Upload 13 files
Browse files- .gitattributes +2 -0
- added_tokens.json +3983 -0
- config.json +46 -0
- configuration_indictrans.py +309 -0
- dict.SRC.json +0 -0
- dict.TGT.json +0 -0
- generation_config.json +7 -0
- model.SRC +3 -0
- model.TGT +3 -0
- model.safetensors +3 -0
- modeling_indictrans.py +1801 -0
- special_tokens_map.json +30 -0
- tokenization_indictrans.py +261 -0
- tokenizer_config.json +50 -0
.gitattributes
CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
model.SRC filter=lfs diff=lfs merge=lfs -text
|
37 |
+
model.TGT filter=lfs diff=lfs merge=lfs -text
|
added_tokens.json
ADDED
@@ -0,0 +1,3983 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"$": 126670,
|
3 |
+
"&": 126667,
|
4 |
+
"....": 123631,
|
5 |
+
"........": 125087,
|
6 |
+
"AB": 123967,
|
7 |
+
"Bhoi": 124290,
|
8 |
+
"December": 124410,
|
9 |
+
"Division": 125162,
|
10 |
+
"GH": 123656,
|
11 |
+
"HMS": 126372,
|
12 |
+
"HRC": 125596,
|
13 |
+
"ILL": 124016,
|
14 |
+
"ILLONG": 124063,
|
15 |
+
"JGP": 123647,
|
16 |
+
"JH": 123707,
|
17 |
+
"Jaintia": 126534,
|
18 |
+
"MB": 125236,
|
19 |
+
"Maw": 126512,
|
20 |
+
"November": 126041,
|
21 |
+
"ONG": 123678,
|
22 |
+
"OSE": 126243,
|
23 |
+
"October": 125915,
|
24 |
+
"PDP": 123424,
|
25 |
+
"RB": 125260,
|
26 |
+
"STA": 126094,
|
27 |
+
"September": 126326,
|
28 |
+
"YM": 126311,
|
29 |
+
"YN": 125026,
|
30 |
+
"YWO": 126405,
|
31 |
+
"]": 126668,
|
32 |
+
"^": 126675,
|
33 |
+
"aak": 124456,
|
34 |
+
"aboh": 126201,
|
35 |
+
"abriel": 126489,
|
36 |
+
"achers": 125400,
|
37 |
+
"adan": 126026,
|
38 |
+
"adhar": 125687,
|
39 |
+
"agog": 125856,
|
40 |
+
"aiaw": 123320,
|
41 |
+
"aiawmoit": 123665,
|
42 |
+
"aineh": 126473,
|
43 |
+
"ainia": 125863,
|
44 |
+
"aining": 125089,
|
45 |
+
"aintia": 122825,
|
46 |
+
"aiong": 123867,
|
47 |
+
"aith": 125107,
|
48 |
+
"aiñtia": 123643,
|
49 |
+
"akhe": 123291,
|
50 |
+
"akhi": 125496,
|
51 |
+
"akma": 125075,
|
52 |
+
"akor": 125196,
|
53 |
+
"aleh": 123683,
|
54 |
+
"alehkai": 123997,
|
55 |
+
"alei": 123354,
|
56 |
+
"alek": 126265,
|
57 |
+
"alley": 125902,
|
58 |
+
"alok": 123180,
|
59 |
+
"along": 124425,
|
60 |
+
"amar": 123044,
|
61 |
+
"amarjan": 125774,
|
62 |
+
"amon": 125421,
|
63 |
+
"amot": 124251,
|
64 |
+
"anaan": 125100,
|
65 |
+
"anai": 125124,
|
66 |
+
"andhi": 124145,
|
67 |
+
"angeh": 123026,
|
68 |
+
"angma": 122870,
|
69 |
+
"anik": 124224,
|
70 |
+
"anikor": 124271,
|
71 |
+
"annis": 123908,
|
72 |
+
"aphriang": 124472,
|
73 |
+
"apung": 124710,
|
74 |
+
"areth": 125705,
|
75 |
+
"arieh": 126008,
|
76 |
+
"arik": 122761,
|
77 |
+
"aroh": 123064,
|
78 |
+
"aron": 123534,
|
79 |
+
"arong": 123476,
|
80 |
+
"artatien": 125712,
|
81 |
+
"arter": 126082,
|
82 |
+
"artiang": 124591,
|
83 |
+
"asam": 126330,
|
84 |
+
"asar": 125193,
|
85 |
+
"askos": 126452,
|
86 |
+
"asoh": 125979,
|
87 |
+
"asseh": 124198,
|
88 |
+
"asur": 125391,
|
89 |
+
"athiang": 125358,
|
90 |
+
"atholic": 126414,
|
91 |
+
"athuh": 123018,
|
92 |
+
"athuhlypa": 124182,
|
93 |
+
"atien": 124089,
|
94 |
+
"aven": 125355,
|
95 |
+
"aïah": 124896,
|
96 |
+
"bangeit": 123926,
|
97 |
+
"baniang": 124052,
|
98 |
+
"banon": 125753,
|
99 |
+
"baptis": 124460,
|
100 |
+
"bari": 124989,
|
101 |
+
"baton": 124510,
|
102 |
+
"beh": 125209,
|
103 |
+
"bei": 125933,
|
104 |
+
"bein": 123513,
|
105 |
+
"beiñ": 124118,
|
106 |
+
"ben": 124947,
|
107 |
+
"beration": 126007,
|
108 |
+
"bhah": 126491,
|
109 |
+
"bhalang": 123099,
|
110 |
+
"bhoi": 123902,
|
111 |
+
"bian": 125865,
|
112 |
+
"biang": 124528,
|
113 |
+
"bih": 123797,
|
114 |
+
"bilat": 124170,
|
115 |
+
"bishar": 123870,
|
116 |
+
"blang": 125874,
|
117 |
+
"blei": 123079,
|
118 |
+
"bnai": 124474,
|
119 |
+
"bneng": 124928,
|
120 |
+
"boklang": 126179,
|
121 |
+
"bong": 124535,
|
122 |
+
"borah": 126442,
|
123 |
+
"borlang": 124664,
|
124 |
+
"bram": 126129,
|
125 |
+
"brary": 126457,
|
126 |
+
"briew": 123206,
|
127 |
+
"bron": 125289,
|
128 |
+
"brot": 123176,
|
129 |
+
"bru": 124078,
|
130 |
+
"bruary": 126275,
|
131 |
+
"bud": 124600,
|
132 |
+
"buh": 125022,
|
133 |
+
"buit": 125272,
|
134 |
+
"buk": 124771,
|
135 |
+
"bukhadnessar": 126350,
|
136 |
+
"bulance": 125155,
|
137 |
+
"bunal": 125917,
|
138 |
+
"burom": 123260,
|
139 |
+
"bution": 126145,
|
140 |
+
"bymman": 126504,
|
141 |
+
"bynriew": 122846,
|
142 |
+
"bynta": 123582,
|
143 |
+
"byr": 126285,
|
144 |
+
"byter": 123755,
|
145 |
+
"byterian": 123918,
|
146 |
+
"cend": 125736,
|
147 |
+
"chik": 126031,
|
148 |
+
"cil": 122867,
|
149 |
+
"crific": 125675,
|
150 |
+
"ctober": 125097,
|
151 |
+
"ctory": 126313,
|
152 |
+
"dal": 125847,
|
153 |
+
"dap": 123126,
|
154 |
+
"das": 125527,
|
155 |
+
"dc": 122896,
|
156 |
+
"dein": 125748,
|
157 |
+
"dem": 124169,
|
158 |
+
"dew": 125836,
|
159 |
+
"dge": 125619,
|
160 |
+
"diang": 122854,
|
161 |
+
"dieng": 123568,
|
162 |
+
"dih": 123981,
|
163 |
+
"din": 125514,
|
164 |
+
"dit": 123268,
|
165 |
+
"ditional": 124364,
|
166 |
+
"division": 125585,
|
167 |
+
"donkam": 122844,
|
168 |
+
"dorbar": 125357,
|
169 |
+
"dp": 123094,
|
170 |
+
"dr": 125201,
|
171 |
+
"duid": 125021,
|
172 |
+
"dum": 125482,
|
173 |
+
"duma": 124038,
|
174 |
+
"eak": 124227,
|
175 |
+
"eb": 123340,
|
176 |
+
"ecember": 124134,
|
177 |
+
"echn": 124465,
|
178 |
+
"echnology": 125592,
|
179 |
+
"ecl": 124831,
|
180 |
+
"econd": 123457,
|
181 |
+
"econdary": 123605,
|
182 |
+
"ecret": 122812,
|
183 |
+
"ecretar": 124011,
|
184 |
+
"ecretariat": 124178,
|
185 |
+
"ecretary": 122836,
|
186 |
+
"ecretery": 126436,
|
187 |
+
"ectoral": 126308,
|
188 |
+
"ectric": 125975,
|
189 |
+
"ecut": 123166,
|
190 |
+
"edar": 125018,
|
191 |
+
"edekai": 126625,
|
192 |
+
"ederation": 123801,
|
193 |
+
"eft": 125425,
|
194 |
+
"egis": 125510,
|
195 |
+
"egislative": 126142,
|
196 |
+
"egree": 125523,
|
197 |
+
"eho": 123919,
|
198 |
+
"ehoboam": 126214,
|
199 |
+
"eign": 125589,
|
200 |
+
"ein": 122803,
|
201 |
+
"eiñ": 123036,
|
202 |
+
"ekai": 124722,
|
203 |
+
"elbert": 123876,
|
204 |
+
"elekh": 125067,
|
205 |
+
"elhi": 123191,
|
206 |
+
"empl": 123032,
|
207 |
+
"emple": 123737,
|
208 |
+
"enant": 126538,
|
209 |
+
"engal": 124277,
|
210 |
+
"eni": 123333,
|
211 |
+
"enik": 126463,
|
212 |
+
"enior": 125233,
|
213 |
+
"enith": 126028,
|
214 |
+
"enti": 123747,
|
215 |
+
"entil": 124318,
|
216 |
+
"eon": 125783,
|
217 |
+
"eop": 123020,
|
218 |
+
"eople": 123042,
|
219 |
+
"eoples": 125842,
|
220 |
+
"eph": 123284,
|
221 |
+
"eptember": 125363,
|
222 |
+
"erica": 126018,
|
223 |
+
"ertific": 124462,
|
224 |
+
"ertificate": 124580,
|
225 |
+
"erus": 122976,
|
226 |
+
"erusalem": 122977,
|
227 |
+
"eser": 126123,
|
228 |
+
"eshon": 124454,
|
229 |
+
"esti": 123955,
|
230 |
+
"estine": 126453,
|
231 |
+
"eterinary": 126091,
|
232 |
+
"etr": 123293,
|
233 |
+
"etre": 126305,
|
234 |
+
"evel": 123269,
|
235 |
+
"evelop": 123436,
|
236 |
+
"evelopment": 123451,
|
237 |
+
"even": 125083,
|
238 |
+
"fety": 126024,
|
239 |
+
"ffair": 124013,
|
240 |
+
"ffairs": 124150,
|
241 |
+
"ffer": 125566,
|
242 |
+
"fication": 124840,
|
243 |
+
"ficit": 125993,
|
244 |
+
"former": 126131,
|
245 |
+
"galand": 124562,
|
246 |
+
"ganis": 123964,
|
247 |
+
"ganiz": 125052,
|
248 |
+
"ganj": 125871,
|
249 |
+
"gether": 126643,
|
250 |
+
"gher": 124028,
|
251 |
+
"ghts": 124814,
|
252 |
+
"ghty": 126521,
|
253 |
+
"gistrate": 124294,
|
254 |
+
"gp": 124139,
|
255 |
+
"gre": 124649,
|
256 |
+
"haar": 124620,
|
257 |
+
"haba": 123294,
|
258 |
+
"hadar": 126458,
|
259 |
+
"hadc": 124067,
|
260 |
+
"hadien": 125000,
|
261 |
+
"hadnessar": 125015,
|
262 |
+
"haios": 125974,
|
263 |
+
"hair": 123091,
|
264 |
+
"hairman": 123148,
|
265 |
+
"hairperson": 125924,
|
266 |
+
"hajar": 124457,
|
267 |
+
"hala": 122750,
|
268 |
+
"halor": 124702,
|
269 |
+
"harashtra": 126339,
|
270 |
+
"hath": 125212,
|
271 |
+
"hedule": 123829,
|
272 |
+
"heduled": 124669,
|
273 |
+
"heme": 123868,
|
274 |
+
"heptieng": 124058,
|
275 |
+
"hers": 123571,
|
276 |
+
"heth": 125786,
|
277 |
+
"hiajer": 126660,
|
278 |
+
"hiang": 123764,
|
279 |
+
"hied": 125608,
|
280 |
+
"hief": 123098,
|
281 |
+
"hlim": 126218,
|
282 |
+
"hoc": 126561,
|
283 |
+
"hoh": 125927,
|
284 |
+
"hok": 123971,
|
285 |
+
"holi": 125070,
|
286 |
+
"holik": 125244,
|
287 |
+
"hud": 124175,
|
288 |
+
"huid": 123147,
|
289 |
+
"hukor": 125784,
|
290 |
+
"hullai": 124561,
|
291 |
+
"hup": 122806,
|
292 |
+
"hynrei": 124955,
|
293 |
+
"iad": 123563,
|
294 |
+
"iaid": 123544,
|
295 |
+
"ialam": 125336,
|
296 |
+
"ialehkai": 126086,
|
297 |
+
"iarap": 126074,
|
298 |
+
"iathuh": 125154,
|
299 |
+
"iaw": 122979,
|
300 |
+
"ibe": 124943,
|
301 |
+
"ibeon": 126508,
|
302 |
+
"icense": 124220,
|
303 |
+
"ideon": 125475,
|
304 |
+
"idi": 124258,
|
305 |
+
"idon": 126427,
|
306 |
+
"ieh": 122745,
|
307 |
+
"ieit": 124940,
|
308 |
+
"iel": 123402,
|
309 |
+
"iem": 122722,
|
310 |
+
"iet": 125113,
|
311 |
+
"iety": 123655,
|
312 |
+
"ieu": 126264,
|
313 |
+
"iewspah": 126063,
|
314 |
+
"iewtrep": 123512,
|
315 |
+
"ifinal": 126516,
|
316 |
+
"ii": 123089,
|
317 |
+
"ikho": 125931,
|
318 |
+
"ilead": 124697,
|
319 |
+
"ilg": 126014,
|
320 |
+
"illage": 124099,
|
321 |
+
"imelekh": 126258,
|
322 |
+
"imited": 124695,
|
323 |
+
"incent": 124486,
|
324 |
+
"incipal": 124019,
|
325 |
+
"indo": 124610,
|
326 |
+
"ineh": 126393,
|
327 |
+
"inet": 126003,
|
328 |
+
"inguh": 123030,
|
329 |
+
"inong": 124275,
|
330 |
+
"iph": 123503,
|
331 |
+
"iplang": 126027,
|
332 |
+
"ircle": 123160,
|
333 |
+
"irdar": 125120,
|
334 |
+
"irector": 123250,
|
335 |
+
"irls": 126368,
|
336 |
+
"irot": 124421,
|
337 |
+
"isabet": 125224,
|
338 |
+
"ishni": 125851,
|
339 |
+
"ishon": 124706,
|
340 |
+
"issioner": 123146,
|
341 |
+
"istant": 124003,
|
342 |
+
"istar": 125086,
|
343 |
+
"istol": 126402,
|
344 |
+
"istor": 125245,
|
345 |
+
"istr": 123465,
|
346 |
+
"istrat": 125305,
|
347 |
+
"istration": 124053,
|
348 |
+
"istri": 122749,
|
349 |
+
"istribution": 126507,
|
350 |
+
"istrik": 124239,
|
351 |
+
"iswa": 126064,
|
352 |
+
"itara": 125449,
|
353 |
+
"ithuh": 124265,
|
354 |
+
"itizenship": 124453,
|
355 |
+
"itos": 124459,
|
356 |
+
"itte": 122958,
|
357 |
+
"iuh": 125270,
|
358 |
+
"iur": 125192,
|
359 |
+
"ivers": 123698,
|
360 |
+
"iversity": 123760,
|
361 |
+
"iw": 123288,
|
362 |
+
"iñ": 123130,
|
363 |
+
"jahar": 124335,
|
364 |
+
"jahrin": 126357,
|
365 |
+
"jai": 123823,
|
366 |
+
"jait": 124805,
|
367 |
+
"jaiñ": 123927,
|
368 |
+
"jamin": 123932,
|
369 |
+
"jana": 125387,
|
370 |
+
"jari": 124123,
|
371 |
+
"jaw": 125141,
|
372 |
+
"jer": 124612,
|
373 |
+
"jh": 126591,
|
374 |
+
"jied": 124297,
|
375 |
+
"jih": 124291,
|
376 |
+
"jip": 123134,
|
377 |
+
"jipt": 123136,
|
378 |
+
"jir": 124506,
|
379 |
+
"jok": 125919,
|
380 |
+
"jon": 125908,
|
381 |
+
"jop": 123639,
|
382 |
+
"jp": 123299,
|
383 |
+
"jrong": 123806,
|
384 |
+
"juh": 122784,
|
385 |
+
"jukut": 124071,
|
386 |
+
"june": 126282,
|
387 |
+
"kaba": 125758,
|
388 |
+
"kad": 125989,
|
389 |
+
"kaji": 126406,
|
390 |
+
"kajia": 125910,
|
391 |
+
"kalang": 126277,
|
392 |
+
"kane": 124102,
|
393 |
+
"kas": 124868,
|
394 |
+
"katba": 125818,
|
395 |
+
"katei": 124662,
|
396 |
+
"katkum": 124044,
|
397 |
+
"kaïah": 125615,
|
398 |
+
"kem": 125094,
|
399 |
+
"khaid": 124633,
|
400 |
+
"khang": 123592,
|
401 |
+
"khap": 125698,
|
402 |
+
"kharai": 126047,
|
403 |
+
"khawai": 124991,
|
404 |
+
"khia": 123081,
|
405 |
+
"khiah": 125157,
|
406 |
+
"khie": 126069,
|
407 |
+
"khiew": 124024,
|
408 |
+
"khih": 123070,
|
409 |
+
"khim": 123121,
|
410 |
+
"khing": 123920,
|
411 |
+
"khla": 124625,
|
412 |
+
"khlad": 124682,
|
413 |
+
"khlam": 124372,
|
414 |
+
"khlaw": 123493,
|
415 |
+
"khlem": 125727,
|
416 |
+
"khlia": 124311,
|
417 |
+
"khlieh": 123311,
|
418 |
+
"kho": 125495,
|
419 |
+
"khoi": 125325,
|
420 |
+
"khon": 126479,
|
421 |
+
"khrah": 123735,
|
422 |
+
"khraw": 125958,
|
423 |
+
"khubor": 123267,
|
424 |
+
"khuh": 123799,
|
425 |
+
"khuid": 123085,
|
426 |
+
"khun": 124723,
|
427 |
+
"kiang": 125500,
|
428 |
+
"kiba": 125370,
|
429 |
+
"kieh": 123613,
|
430 |
+
"kiew": 125395,
|
431 |
+
"kir": 125335,
|
432 |
+
"kitei": 126162,
|
433 |
+
"kj": 124424,
|
434 |
+
"kjat": 126234,
|
435 |
+
"kjgp": 124463,
|
436 |
+
"kla": 123939,
|
437 |
+
"kmie": 123039,
|
438 |
+
"knor": 126620,
|
439 |
+
"kohnguh": 126020,
|
440 |
+
"kong": 123762,
|
441 |
+
"kongor": 123973,
|
442 |
+
"kpa": 126073,
|
443 |
+
"krang": 125333,
|
444 |
+
"kri": 122929,
|
445 |
+
"ksan": 125707,
|
446 |
+
"kseh": 125672,
|
447 |
+
"kt": 123426,
|
448 |
+
"ktiar": 125012,
|
449 |
+
"ktien": 123978,
|
450 |
+
"kular": 124878,
|
451 |
+
"kumta": 125444,
|
452 |
+
"kun": 124976,
|
453 |
+
"kupar": 123610,
|
454 |
+
"kur": 123295,
|
455 |
+
"kurim": 123543,
|
456 |
+
"kylla": 125158,
|
457 |
+
"kyn": 124248,
|
458 |
+
"kynthei": 124889,
|
459 |
+
"kynti": 124590,
|
460 |
+
"kyr": 123538,
|
461 |
+
"kyrwat": 123851,
|
462 |
+
"laboh": 125583,
|
463 |
+
"lade": 123023,
|
464 |
+
"ladesh": 123210,
|
465 |
+
"laiñ": 123617,
|
466 |
+
"laka": 123569,
|
467 |
+
"lance": 124640,
|
468 |
+
"langbrot": 124000,
|
469 |
+
"larem": 123968,
|
470 |
+
"lask": 126592,
|
471 |
+
"lative": 125896,
|
472 |
+
"lawh": 124842,
|
473 |
+
"laya": 125906,
|
474 |
+
"lc": 124200,
|
475 |
+
"lehem": 125775,
|
476 |
+
"lehkai": 122885,
|
477 |
+
"lehrain": 125308,
|
478 |
+
"lein": 124360,
|
479 |
+
"lia": 123904,
|
480 |
+
"liament": 123692,
|
481 |
+
"liamentary": 124325,
|
482 |
+
"liance": 124343,
|
483 |
+
"liar": 123770,
|
484 |
+
"lib": 124197,
|
485 |
+
"lieh": 122811,
|
486 |
+
"lien": 125410,
|
487 |
+
"lieng": 126045,
|
488 |
+
"liew": 124775,
|
489 |
+
"lijah": 124592,
|
490 |
+
"linia": 126240,
|
491 |
+
"lion": 124207,
|
492 |
+
"lir": 124582,
|
493 |
+
"lis": 125913,
|
494 |
+
"listia": 123744,
|
495 |
+
"lius": 126222,
|
496 |
+
"lly": 124447,
|
497 |
+
"loo": 125374,
|
498 |
+
"lop": 124432,
|
499 |
+
"lp": 123429,
|
500 |
+
"lub": 123531,
|
501 |
+
"luit": 125008,
|
502 |
+
"lukhi": 125197,
|
503 |
+
"lumar": 126647,
|
504 |
+
"lumlang": 124212,
|
505 |
+
"lute": 126433,
|
506 |
+
"lyng": 123820,
|
507 |
+
"lyngdoh": 125894,
|
508 |
+
"lyngk": 123372,
|
509 |
+
"lyngkot": 123458,
|
510 |
+
"lyngngoh": 125980,
|
511 |
+
"lynnong": 125372,
|
512 |
+
"lypa": 123158,
|
513 |
+
"majai": 126596,
|
514 |
+
"mare": 126412,
|
515 |
+
"mareh": 125827,
|
516 |
+
"mart": 126527,
|
517 |
+
"mation": 124856,
|
518 |
+
"mel": 126486,
|
519 |
+
"mend": 123754,
|
520 |
+
"mendment": 123878,
|
521 |
+
"mihngi": 125328,
|
522 |
+
"miki": 126425,
|
523 |
+
"ministrat": 125380,
|
524 |
+
"ministration": 124583,
|
525 |
+
"moh": 125258,
|
526 |
+
"moit": 123593,
|
527 |
+
"mraw": 124066,
|
528 |
+
"mulong": 125430,
|
529 |
+
"mund": 126653,
|
530 |
+
"mur": 124834,
|
531 |
+
"mus": 124397,
|
532 |
+
"musiang": 125004,
|
533 |
+
"mut": 124115,
|
534 |
+
"myntdu": 125723,
|
535 |
+
"narphna": 125659,
|
536 |
+
"natang": 125960,
|
537 |
+
"national": 124734,
|
538 |
+
"nda": 124049,
|
539 |
+
"ndi": 126548,
|
540 |
+
"ndia": 122826,
|
541 |
+
"ndu": 124452,
|
542 |
+
"nduli": 125673,
|
543 |
+
"ndur": 125309,
|
544 |
+
"neer": 124593,
|
545 |
+
"nep": 124892,
|
546 |
+
"nessar": 125014,
|
547 |
+
"ngad": 124436,
|
548 |
+
"ngah": 124349,
|
549 |
+
"ngap": 123894,
|
550 |
+
"ngar": 124268,
|
551 |
+
"ngat": 123857,
|
552 |
+
"ngeit": 123010,
|
553 |
+
"ngen": 124107,
|
554 |
+
"ngiah": 125108,
|
555 |
+
"ngiang": 125183,
|
556 |
+
"ngiew": 125402,
|
557 |
+
"ngit": 124613,
|
558 |
+
"ngoh": 123318,
|
559 |
+
"ngor": 124385,
|
560 |
+
"ngot": 126456,
|
561 |
+
"nguh": 122994,
|
562 |
+
"niang": 123195,
|
563 |
+
"niaw": 124826,
|
564 |
+
"nie": 125455,
|
565 |
+
"nihat": 124377,
|
566 |
+
"nit": 125489,
|
567 |
+
"niuh": 125629,
|
568 |
+
"nod": 126116,
|
569 |
+
"non": 126487,
|
570 |
+
"nongrim": 125401,
|
571 |
+
"nor": 125893,
|
572 |
+
"oam": 124138,
|
573 |
+
"oan": 124920,
|
574 |
+
"oanis": 126095,
|
575 |
+
"oannis": 124902,
|
576 |
+
"oard": 123417,
|
577 |
+
"oboam": 124205,
|
578 |
+
"ocese": 125880,
|
579 |
+
"ocess": 124788,
|
580 |
+
"ockdown": 125485,
|
581 |
+
"odel": 125738,
|
582 |
+
"odom": 126272,
|
583 |
+
"ohiing": 124658,
|
584 |
+
"ohiong": 124141,
|
585 |
+
"ohkit": 123055,
|
586 |
+
"ohn": 123516,
|
587 |
+
"ohnguh": 123054,
|
588 |
+
"ohsniew": 124121,
|
589 |
+
"oice": 126664,
|
590 |
+
"oil": 126161,
|
591 |
+
"oitan": 124168,
|
592 |
+
"oitri": 126030,
|
593 |
+
"oiñ": 123763,
|
594 |
+
"okram": 123403,
|
595 |
+
"oktor": 123594,
|
596 |
+
"olice": 123150,
|
597 |
+
"olicy": 123923,
|
598 |
+
"ollege": 123265,
|
599 |
+
"olling": 125782,
|
600 |
+
"olloi": 124448,
|
601 |
+
"oloi": 126297,
|
602 |
+
"olom": 123489,
|
603 |
+
"olomon": 123532,
|
604 |
+
"olun": 125660,
|
605 |
+
"olunte": 126114,
|
606 |
+
"oly": 125503,
|
607 |
+
"omas": 124298,
|
608 |
+
"ompyrdi": 126169,
|
609 |
+
"onbor": 123652,
|
610 |
+
"ongress": 122860,
|
611 |
+
"onsar": 126130,
|
612 |
+
"onstit": 122901,
|
613 |
+
"onstitu": 123123,
|
614 |
+
"onstituency": 123154,
|
615 |
+
"onstitution": 125126,
|
616 |
+
"onstitwensi": 123124,
|
617 |
+
"ontrak": 124162,
|
618 |
+
"ooid": 123974,
|
619 |
+
"ook": 123421,
|
620 |
+
"ootball": 123539,
|
621 |
+
"oper": 126016,
|
622 |
+
"oping": 124636,
|
623 |
+
"ordan": 123843,
|
624 |
+
"ordar": 123561,
|
625 |
+
"ordekai": 125833,
|
626 |
+
"ordin": 125312,
|
627 |
+
"ordit": 125692,
|
628 |
+
"ordor": 123861,
|
629 |
+
"orge": 125791,
|
630 |
+
"ortem": 126624,
|
631 |
+
"oseshon": 126295,
|
632 |
+
"osia": 126042,
|
633 |
+
"osit": 123938,
|
634 |
+
"oso": 124407,
|
635 |
+
"ospel": 124247,
|
636 |
+
"ostel": 125806,
|
637 |
+
"oster": 126474,
|
638 |
+
"ostol": 124148,
|
639 |
+
"otduma": 124092,
|
640 |
+
"otel": 124329,
|
641 |
+
"othi": 126536,
|
642 |
+
"ovember": 125092,
|
643 |
+
"ovement": 124300,
|
644 |
+
"overn": 123793,
|
645 |
+
"ovt": 125288,
|
646 |
+
"oxing": 126562,
|
647 |
+
"oys": 125186,
|
648 |
+
"paid": 124548,
|
649 |
+
"paidbah": 125227,
|
650 |
+
"para": 124163,
|
651 |
+
"pateng": 123960,
|
652 |
+
"pati": 125643,
|
653 |
+
"pc": 125697,
|
654 |
+
"pd": 125861,
|
655 |
+
"pdeng": 123273,
|
656 |
+
"pdiang": 123854,
|
657 |
+
"phang": 122781,
|
658 |
+
"phat": 124984,
|
659 |
+
"phew": 123043,
|
660 |
+
"phlang": 123658,
|
661 |
+
"phna": 124116,
|
662 |
+
"phniang": 123587,
|
663 |
+
"phoh": 125576,
|
664 |
+
"phra": 126301,
|
665 |
+
"phran": 124541,
|
666 |
+
"phrang": 123565,
|
667 |
+
"phri": 125472,
|
668 |
+
"phylla": 125061,
|
669 |
+
"piah": 125552,
|
670 |
+
"pion": 124522,
|
671 |
+
"pliang": 124259,
|
672 |
+
"ply": 125176,
|
673 |
+
"pohliew": 125448,
|
674 |
+
"pong": 124433,
|
675 |
+
"pril": 126164,
|
676 |
+
"ptieng": 123198,
|
677 |
+
"ptis": 124353,
|
678 |
+
"puh": 124963,
|
679 |
+
"pung": 123780,
|
680 |
+
"pyns": 126332,
|
681 |
+
"pyrshah": 124451,
|
682 |
+
"pyrthei": 125599,
|
683 |
+
"race": 124550,
|
684 |
+
"radu": 126635,
|
685 |
+
"rael": 122789,
|
686 |
+
"raham": 123564,
|
687 |
+
"raij": 124893,
|
688 |
+
"raim": 124375,
|
689 |
+
"raishan": 125242,
|
690 |
+
"rakhe": 123635,
|
691 |
+
"ranium": 124039,
|
692 |
+
"ratis": 126420,
|
693 |
+
"reilang": 123557,
|
694 |
+
"rieh": 123598,
|
695 |
+
"riel": 126061,
|
696 |
+
"riew": 124316,
|
697 |
+
"ristan": 125578,
|
698 |
+
"ritory": 126524,
|
699 |
+
"roit": 125405,
|
700 |
+
"rope": 124863,
|
701 |
+
"rot": 125412,
|
702 |
+
"rwiang": 125999,
|
703 |
+
"rymbai": 124981,
|
704 |
+
"ryng": 124696,
|
705 |
+
"ryngkham": 125925,
|
706 |
+
"sad": 124676,
|
707 |
+
"sah": 124037,
|
708 |
+
"said": 125003,
|
709 |
+
"salom": 124716,
|
710 |
+
"sat": 124413,
|
711 |
+
"sbun": 126040,
|
712 |
+
"seng": 125564,
|
713 |
+
"shad": 123834,
|
714 |
+
"shai": 122859,
|
715 |
+
"shaid": 125714,
|
716 |
+
"shain": 125177,
|
717 |
+
"shaniah": 124331,
|
718 |
+
"shaphat": 125230,
|
719 |
+
"sher": 125009,
|
720 |
+
"shiing": 123856,
|
721 |
+
"shim": 123155,
|
722 |
+
"shin": 124921,
|
723 |
+
"shing": 126424,
|
724 |
+
"shisha": 123396,
|
725 |
+
"shit": 124035,
|
726 |
+
"shitom": 122981,
|
727 |
+
"shkor": 123283,
|
728 |
+
"shlur": 125396,
|
729 |
+
"shma": 126046,
|
730 |
+
"shmael": 126448,
|
731 |
+
"shmir": 125518,
|
732 |
+
"shng": 123197,
|
733 |
+
"shngain": 123342,
|
734 |
+
"shngaiñ": 124292,
|
735 |
+
"shnong": 123537,
|
736 |
+
"shoh": 125750,
|
737 |
+
"shong": 122842,
|
738 |
+
"shor": 124412,
|
739 |
+
"shtra": 126146,
|
740 |
+
"shuh": 124348,
|
741 |
+
"shuwa": 125662,
|
742 |
+
"shwa": 123215,
|
743 |
+
"shynr": 124764,
|
744 |
+
"shynrut": 125038,
|
745 |
+
"shyrkhei": 124574,
|
746 |
+
"sian": 125961,
|
747 |
+
"siang": 125649,
|
748 |
+
"siat": 124246,
|
749 |
+
"sic": 124968,
|
750 |
+
"sil": 125231,
|
751 |
+
"sion": 122888,
|
752 |
+
"sis": 126423,
|
753 |
+
"site": 125721,
|
754 |
+
"skhem": 124400,
|
755 |
+
"slai": 123802,
|
756 |
+
"sn": 122946,
|
757 |
+
"snem": 124871,
|
758 |
+
"sngew": 122965,
|
759 |
+
"sngewthuh": 125327,
|
760 |
+
"sngi": 123785,
|
761 |
+
"sngiat": 126398,
|
762 |
+
"sniew": 123447,
|
763 |
+
"sning": 123514,
|
764 |
+
"sohtun": 126594,
|
765 |
+
"song": 124576,
|
766 |
+
"sot": 125620,
|
767 |
+
"spah": 123224,
|
768 |
+
"spector": 125221,
|
769 |
+
"spung": 126101,
|
770 |
+
"ssa": 125524,
|
771 |
+
"ssar": 125872,
|
772 |
+
"starwell": 125848,
|
773 |
+
"stem": 124806,
|
774 |
+
"stieh": 126459,
|
775 |
+
"stoin": 123338,
|
776 |
+
"stor": 123597,
|
777 |
+
"strum": 126575,
|
778 |
+
"stur": 123632,
|
779 |
+
"suin": 124675,
|
780 |
+
"suk": 125237,
|
781 |
+
"sur": 126641,
|
782 |
+
"swet": 124964,
|
783 |
+
"syiem": 123644,
|
784 |
+
"syntiew": 125853,
|
785 |
+
"tad": 122918,
|
786 |
+
"taiew": 125932,
|
787 |
+
"talang": 126037,
|
788 |
+
"tali": 126036,
|
789 |
+
"talion": 126564,
|
790 |
+
"tap": 124113,
|
791 |
+
"tarik": 123144,
|
792 |
+
"td": 124700,
|
793 |
+
"teidor": 124872,
|
794 |
+
"thad": 124223,
|
795 |
+
"thadlaboh": 125702,
|
796 |
+
"thadraishan": 125311,
|
797 |
+
"thait": 123750,
|
798 |
+
"thap": 126379,
|
799 |
+
"thiah": 123828,
|
800 |
+
"thie": 123787,
|
801 |
+
"thied": 124509,
|
802 |
+
"thma": 125502,
|
803 |
+
"thngan": 125664,
|
804 |
+
"thony": 125071,
|
805 |
+
"thor": 124365,
|
806 |
+
"thuh": 122729,
|
807 |
+
"thymmai": 123309,
|
808 |
+
"tian": 124477,
|
809 |
+
"tiar": 124033,
|
810 |
+
"tice": 123937,
|
811 |
+
"tien": 126331,
|
812 |
+
"tikna": 125228,
|
813 |
+
"til": 125641,
|
814 |
+
"tim": 124165,
|
815 |
+
"tine": 124333,
|
816 |
+
"tive": 124097,
|
817 |
+
"tn": 126533,
|
818 |
+
"tok": 124119,
|
819 |
+
"tone": 123398,
|
820 |
+
"trai": 125179,
|
821 |
+
"tre": 125346,
|
822 |
+
"trhem": 125220,
|
823 |
+
"triem": 124156,
|
824 |
+
"trol": 125181,
|
825 |
+
"try": 124458,
|
826 |
+
"tsaw": 126480,
|
827 |
+
"ttar": 126391,
|
828 |
+
"ttp": 125278,
|
829 |
+
"tuh": 125341,
|
830 |
+
"tylli": 125542,
|
831 |
+
"tyr": 125042,
|
832 |
+
"tîr": 125259,
|
833 |
+
"uba": 124295,
|
834 |
+
"uben": 125013,
|
835 |
+
"uc": 123149,
|
836 |
+
"ucation": 123661,
|
837 |
+
"uction": 125103,
|
838 |
+
"udah": 123040,
|
839 |
+
"udent": 123392,
|
840 |
+
"uditor": 125837,
|
841 |
+
"uditorium": 126242,
|
842 |
+
"udui": 124217,
|
843 |
+
"ugh": 123892,
|
844 |
+
"ugust": 124736,
|
845 |
+
"uid": 122883,
|
846 |
+
"uiñ": 125434,
|
847 |
+
"ujor": 123266,
|
848 |
+
"ukhar": 125470,
|
849 |
+
"ukhi": 124873,
|
850 |
+
"ukom": 122855,
|
851 |
+
"uksa": 123622,
|
852 |
+
"ukum": 124167,
|
853 |
+
"ulmar": 123673,
|
854 |
+
"ulong": 124313,
|
855 |
+
"umai": 124933,
|
856 |
+
"uman": 124352,
|
857 |
+
"unachal": 124783,
|
858 |
+
"ungoh": 125970,
|
859 |
+
"unity": 124504,
|
860 |
+
"urlong": 123887,
|
861 |
+
"urom": 122834,
|
862 |
+
"urse": 124965,
|
863 |
+
"ursla": 123651,
|
864 |
+
"ustom": 124959,
|
865 |
+
"ustr": 126022,
|
866 |
+
"utang": 123353,
|
867 |
+
"uthority": 124911,
|
868 |
+
"uto": 126569,
|
869 |
+
"utonom": 123485,
|
870 |
+
"utonomous": 123499,
|
871 |
+
"uwai": 123412,
|
872 |
+
"vate": 125740,
|
873 |
+
"vc": 126637,
|
874 |
+
"vid": 123670,
|
875 |
+
"virus": 124881,
|
876 |
+
"viser": 125508,
|
877 |
+
"wain": 124269,
|
878 |
+
"waiñ": 124366,
|
879 |
+
"wang": 124934,
|
880 |
+
"wbih": 123959,
|
881 |
+
"wel": 125785,
|
882 |
+
"weng": 126606,
|
883 |
+
"wensi": 123095,
|
884 |
+
"wer": 123668,
|
885 |
+
"wiang": 124755,
|
886 |
+
"wieng": 123437,
|
887 |
+
"wit": 123931,
|
888 |
+
"wo": 124890,
|
889 |
+
"xth": 124296,
|
890 |
+
"yed": 126236,
|
891 |
+
"yf": 124993,
|
892 |
+
"yiem": 122741,
|
893 |
+
"yier": 126601,
|
894 |
+
"yllad": 123096,
|
895 |
+
"yllah": 125590,
|
896 |
+
"yllai": 124688,
|
897 |
+
"yllait": 122909,
|
898 |
+
"yllei": 124744,
|
899 |
+
"yllem": 123736,
|
900 |
+
"ylliang": 124437,
|
901 |
+
"ylliem": 123207,
|
902 |
+
"ylliew": 123133,
|
903 |
+
"yllip": 125403,
|
904 |
+
"yllipmat": 125617,
|
905 |
+
"yllok": 123759,
|
906 |
+
"yllon": 125153,
|
907 |
+
"yllong": 123413,
|
908 |
+
"yllun": 123862,
|
909 |
+
"yllung": 123445,
|
910 |
+
"ymbai": 123170,
|
911 |
+
"ymbong": 126413,
|
912 |
+
"ymbud": 123581,
|
913 |
+
"ymbui": 124941,
|
914 |
+
"yment": 126229,
|
915 |
+
"ymm": 122760,
|
916 |
+
"ymman": 123314,
|
917 |
+
"ymmang": 123394,
|
918 |
+
"ymmei": 124526,
|
919 |
+
"ympat": 124858,
|
920 |
+
"ympung": 123306,
|
921 |
+
"ynad": 126050,
|
922 |
+
"ynbeit": 125532,
|
923 |
+
"yndai": 123660,
|
924 |
+
"yndang": 124712,
|
925 |
+
"yndeng": 125630,
|
926 |
+
"yndi": 124367,
|
927 |
+
"yndit": 124694,
|
928 |
+
"yndong": 123408,
|
929 |
+
"ynduh": 122935,
|
930 |
+
"ynei": 123434,
|
931 |
+
"yngkat": 122817,
|
932 |
+
"yngkhong": 126621,
|
933 |
+
"yngkhuh": 125878,
|
934 |
+
"yngkiang": 126096,
|
935 |
+
"yngkneng": 124632,
|
936 |
+
"yngkong": 122866,
|
937 |
+
"yngngoh": 124938,
|
938 |
+
"ynjah": 125204,
|
939 |
+
"ynjar": 123264,
|
940 |
+
"ynjuh": 124151,
|
941 |
+
"ynmaw": 123103,
|
942 |
+
"ynn": 122732,
|
943 |
+
"ynna": 125164,
|
944 |
+
"ynnad": 123322,
|
945 |
+
"ynnah": 126344,
|
946 |
+
"ynnai": 126211,
|
947 |
+
"ynnat": 123570,
|
948 |
+
"ynniaw": 125884,
|
949 |
+
"ynnin": 124482,
|
950 |
+
"ynnoh": 123399,
|
951 |
+
"ynnong": 124978,
|
952 |
+
"ynr": 122708,
|
953 |
+
"ynra": 123464,
|
954 |
+
"ynrai": 125533,
|
955 |
+
"ynram": 123753,
|
956 |
+
"ynran": 125947,
|
957 |
+
"ynrang": 122900,
|
958 |
+
"ynrap": 123109,
|
959 |
+
"ynrei": 122712,
|
960 |
+
"ynrem": 126383,
|
961 |
+
"ynrew": 125267,
|
962 |
+
"ynriah": 123566,
|
963 |
+
"ynriew": 122786,
|
964 |
+
"ynrih": 126513,
|
965 |
+
"ynsai": 125557,
|
966 |
+
"ynsar": 125920,
|
967 |
+
"ynsha": 122794,
|
968 |
+
"ynshar": 122799,
|
969 |
+
"ynshet": 125802,
|
970 |
+
"ynshew": 125386,
|
971 |
+
"ynsur": 126363,
|
972 |
+
"yntang": 123361,
|
973 |
+
"yntdu": 124565,
|
974 |
+
"yntem": 126612,
|
975 |
+
"ynter": 126193,
|
976 |
+
"ynti": 123882,
|
977 |
+
"yntiew": 123490,
|
978 |
+
"yntoi": 123029,
|
979 |
+
"yntri": 122754,
|
980 |
+
"yntur": 123921,
|
981 |
+
"yp": 126013,
|
982 |
+
"yrdem": 126469,
|
983 |
+
"yrdi": 124927,
|
984 |
+
"yria": 123781,
|
985 |
+
"yriem": 124495,
|
986 |
+
"yrkh": 123209,
|
987 |
+
"yrkhei": 123343,
|
988 |
+
"yrkhong": 124371,
|
989 |
+
"yrmang": 125936,
|
990 |
+
"yrnai": 123896,
|
991 |
+
"yrng": 123129,
|
992 |
+
"yrngam": 124429,
|
993 |
+
"yrngem": 123555,
|
994 |
+
"yrngiew": 125211,
|
995 |
+
"yrpang": 125419,
|
996 |
+
"yrshan": 123706,
|
997 |
+
"yrwa": 123249,
|
998 |
+
"{": 126673,
|
999 |
+
"}": 126672,
|
1000 |
+
"ângin": 125406,
|
1001 |
+
"âw": 123303,
|
1002 |
+
"âwm": 124257,
|
1003 |
+
"âwn": 124948,
|
1004 |
+
"ïa": 125614,
|
1005 |
+
"ïah": 123435,
|
1006 |
+
"ïap": 124599,
|
1007 |
+
"ïi": 124760,
|
1008 |
+
"ïing": 123589,
|
1009 |
+
"ïong": 124718,
|
1010 |
+
"ñi": 123677,
|
1011 |
+
"ñia": 123615,
|
1012 |
+
"ñiah": 125349,
|
1013 |
+
"ñiang": 124517,
|
1014 |
+
"ñiew": 123227,
|
1015 |
+
"ñiuh": 124773,
|
1016 |
+
"ûng": 125085,
|
1017 |
+
"ģ": 126686,
|
1018 |
+
"Ǝ": 126680,
|
1019 |
+
"Ȉ": 126676,
|
1020 |
+
"ȉ": 126674,
|
1021 |
+
"̎": 126681,
|
1022 |
+
"̏": 126682,
|
1023 |
+
"͂": 126669,
|
1024 |
+
"Ṭ": 126671,
|
1025 |
+
"ἳ": 126678,
|
1026 |
+
"▁..": 125321,
|
1027 |
+
"▁1-0": 126495,
|
1028 |
+
"▁1.": 125505,
|
1029 |
+
"▁2-1": 125867,
|
1030 |
+
"▁ADC": 126159,
|
1031 |
+
"▁AJ": 125168,
|
1032 |
+
"▁ANVC": 125493,
|
1033 |
+
"▁ARPA": 125082,
|
1034 |
+
"▁ASHA": 125821,
|
1035 |
+
"▁Aaron": 124233,
|
1036 |
+
"▁Absalom": 126253,
|
1037 |
+
"▁Adelbert": 124419,
|
1038 |
+
"▁Agnes": 125426,
|
1039 |
+
"▁Ahab": 125898,
|
1040 |
+
"▁Ahi": 126397,
|
1041 |
+
"▁Aiñ": 126092,
|
1042 |
+
"▁Amlarem": 124337,
|
1043 |
+
"▁Ammon": 126232,
|
1044 |
+
"▁Ampareen": 124733,
|
1045 |
+
"▁Asst": 125663,
|
1046 |
+
"▁Assyria": 125546,
|
1047 |
+
"▁BDO": 125099,
|
1048 |
+
"▁Baal": 126072,
|
1049 |
+
"▁Babil": 124230,
|
1050 |
+
"▁Babilon": 126506,
|
1051 |
+
"▁Babilonia": 124770,
|
1052 |
+
"▁Badonbor": 124747,
|
1053 |
+
"▁Bakhuid": 124007,
|
1054 |
+
"▁Bala": 125020,
|
1055 |
+
"▁Balei": 123765,
|
1056 |
+
"▁Banteidor": 125556,
|
1057 |
+
"▁Bareh": 125498,
|
1058 |
+
"▁Baroh": 123304,
|
1059 |
+
"▁Basaiawmoit": 124120,
|
1060 |
+
"▁Beh": 125199,
|
1061 |
+
"▁Benjamin": 124923,
|
1062 |
+
"▁Beth": 125144,
|
1063 |
+
"▁Bindo": 125501,
|
1064 |
+
"▁Bishop": 126434,
|
1065 |
+
"▁Blah": 125597,
|
1066 |
+
"▁Blei": 122777,
|
1067 |
+
"▁Bos": 126359,
|
1068 |
+
"▁Boys": 126244,
|
1069 |
+
"▁Briew": 125661,
|
1070 |
+
"▁Buh": 126034,
|
1071 |
+
"▁Byrnihat": 125300,
|
1072 |
+
"▁CEC": 125965,
|
1073 |
+
"▁CHC": 124988,
|
1074 |
+
"▁CSWO": 125213,
|
1075 |
+
"▁Champion": 125059,
|
1076 |
+
"▁Chur": 126509,
|
1077 |
+
"▁Chyne": 125187,
|
1078 |
+
"▁Dab": 123374,
|
1079 |
+
"▁Dabid": 123385,
|
1080 |
+
"▁Dawki": 124809,
|
1081 |
+
"▁Diengdoh": 124264,
|
1082 |
+
"▁Dkhar": 123296,
|
1083 |
+
"▁Dohling": 126087,
|
1084 |
+
"▁Dolloi": 125053,
|
1085 |
+
"▁Donkupar": 124621,
|
1086 |
+
"▁Dorbar": 123152,
|
1087 |
+
"▁EGH": 126182,
|
1088 |
+
"▁EJH": 124518,
|
1089 |
+
"▁EKH": 124811,
|
1090 |
+
"▁Edom": 125250,
|
1091 |
+
"▁Egy": 125483,
|
1092 |
+
"▁Elaka": 123970,
|
1093 |
+
"▁Elijah": 126103,
|
1094 |
+
"▁Elisha": 125693,
|
1095 |
+
"▁Engi": 124691,
|
1096 |
+
"▁Eph": 125195,
|
1097 |
+
"▁Ephraim": 125813,
|
1098 |
+
"▁Ever": 126223,
|
1099 |
+
"▁FK": 123623,
|
1100 |
+
"▁FKJGP": 123653,
|
1101 |
+
"▁Fer": 126279,
|
1102 |
+
"▁Fr": 123884,
|
1103 |
+
"▁GNLA": 124394,
|
1104 |
+
"▁Gali": 126494,
|
1105 |
+
"▁Gilead": 126215,
|
1106 |
+
"▁Govern": 124327,
|
1107 |
+
"▁Grace": 126198,
|
1108 |
+
"▁HANM": 124720,
|
1109 |
+
"▁HDR": 125356,
|
1110 |
+
"▁HI": 125347,
|
1111 |
+
"▁HNYF": 124784,
|
1112 |
+
"▁HSS": 125151,
|
1113 |
+
"▁Hadien": 123163,
|
1114 |
+
"▁Haduh": 124166,
|
1115 |
+
"▁Haka": 124229,
|
1116 |
+
"▁Halor": 123474,
|
1117 |
+
"▁Hamlet": 125844,
|
1118 |
+
"▁Hangne": 125466,
|
1119 |
+
"▁Hangta": 125118,
|
1120 |
+
"▁Hapoh": 125548,
|
1121 |
+
"▁Henshad": 125708,
|
1122 |
+
"▁Hese": 126502,
|
1123 |
+
"▁Hok": 124698,
|
1124 |
+
"▁Hooid": 124821,
|
1125 |
+
"▁Hoping": 125737,
|
1126 |
+
"▁Hukum": 124216,
|
1127 |
+
"▁Hynn": 123500,
|
1128 |
+
"▁Hynniew": 124990,
|
1129 |
+
"▁Hynrei": 122921,
|
1130 |
+
"▁Hynñiew": 124418,
|
1131 |
+
"▁Hynñiewtrep": 125189,
|
1132 |
+
"▁Iaiong": 123933,
|
1133 |
+
"▁Ialong": 125138,
|
1134 |
+
"▁Iaw": 124186,
|
1135 |
+
"▁Iawmusiang": 125492,
|
1136 |
+
"▁Iew": 123607,
|
1137 |
+
"▁Iing": 125435,
|
1138 |
+
"▁Ijipt": 123575,
|
1139 |
+
"▁Inner": 125907,
|
1140 |
+
"▁Ioh": 126298,
|
1141 |
+
"▁Iong": 125130,
|
1142 |
+
"▁Iongpiah": 125801,
|
1143 |
+
"▁Isaak": 125967,
|
1144 |
+
"▁Iukhar": 125638,
|
1145 |
+
"▁Iukharist": 126380,
|
1146 |
+
"▁JH": 126001,
|
1147 |
+
"▁JHADC": 123578,
|
1148 |
+
"▁JI": 125743,
|
1149 |
+
"▁JN": 126381,
|
1150 |
+
"▁JSM": 125779,
|
1151 |
+
"▁JSU": 124280,
|
1152 |
+
"▁JYF": 126439,
|
1153 |
+
"▁Jaiaw": 124864,
|
1154 |
+
"▁Jaid": 125182,
|
1155 |
+
"▁Jaidbynriew": 125540,
|
1156 |
+
"▁Jaitbynriew": 125265,
|
1157 |
+
"▁Jaiñtia": 123701,
|
1158 |
+
"▁Jakob": 124098,
|
1159 |
+
"▁Jar": 126602,
|
1160 |
+
"▁Jem": 125390,
|
1161 |
+
"▁Jemino": 125581,
|
1162 |
+
"▁Jentil": 126002,
|
1163 |
+
"▁Jerem": 125703,
|
1164 |
+
"▁Jeroboam": 126340,
|
1165 |
+
"▁Jingkhawai": 125198,
|
1166 |
+
"▁Jingpyn": 125650,
|
1167 |
+
"▁Jingthoh": 125862,
|
1168 |
+
"▁Jirang": 125044,
|
1169 |
+
"▁Jisu": 122997,
|
1170 |
+
"▁Jiw": 124051,
|
1171 |
+
"▁Joab": 125674,
|
1172 |
+
"▁Jonathan": 126224,
|
1173 |
+
"▁Jones": 126233,
|
1174 |
+
"▁Jop": 125306,
|
1175 |
+
"▁Jordan": 124715,
|
1176 |
+
"▁Joshua": 124500,
|
1177 |
+
"▁Judah": 123391,
|
1178 |
+
"▁Jus": 124403,
|
1179 |
+
"▁Jutang": 125294,
|
1180 |
+
"▁Jylla": 122986,
|
1181 |
+
"▁Jylliew": 123722,
|
1182 |
+
"▁Jymmang": 123709,
|
1183 |
+
"▁Jyrwa": 124797,
|
1184 |
+
"▁KHA": 126247,
|
1185 |
+
"▁KHADC": 123008,
|
1186 |
+
"▁KHNAM": 123740,
|
1187 |
+
"▁KJP": 126584,
|
1188 |
+
"▁Kaei": 124189,
|
1189 |
+
"▁Kanaan": 125579,
|
1190 |
+
"▁Karbi": 125095,
|
1191 |
+
"▁Kashari": 124912,
|
1192 |
+
"▁Kata": 124450,
|
1193 |
+
"▁Katba": 123359,
|
1194 |
+
"▁Katei": 123453,
|
1195 |
+
"▁Katholik": 126208,
|
1196 |
+
"▁Katkum": 123275,
|
1197 |
+
"▁Katno": 125304,
|
1198 |
+
"▁Katto": 125170,
|
1199 |
+
"▁Kawei": 123983,
|
1200 |
+
"▁Khad": 125457,
|
1201 |
+
"▁Khana": 126321,
|
1202 |
+
"▁Kharbani": 126589,
|
1203 |
+
"▁Kharkongor": 124554,
|
1204 |
+
"▁Kharkrang": 126498,
|
1205 |
+
"▁Kharlukhi": 126185,
|
1206 |
+
"▁Kharshiing": 124347,
|
1207 |
+
"▁Khiah": 125929,
|
1208 |
+
"▁Khie": 125050,
|
1209 |
+
"▁Khlad": 126132,
|
1210 |
+
"▁Khlem": 126226,
|
1211 |
+
"▁Khlieh": 123382,
|
1212 |
+
"▁Khristan": 123628,
|
1213 |
+
"▁Khublei": 126580,
|
1214 |
+
"▁Khun": 123879,
|
1215 |
+
"▁Khyndai": 125257,
|
1216 |
+
"▁Khyr": 126328,
|
1217 |
+
"▁Khyriem": 125798,
|
1218 |
+
"▁Kiang": 124283,
|
1219 |
+
"▁Kin": 123985,
|
1220 |
+
"▁Kita": 125030,
|
1221 |
+
"▁Kitab": 126587,
|
1222 |
+
"▁Kitei": 124075,
|
1223 |
+
"▁Kito": 124950,
|
1224 |
+
"▁Kiwei": 124010,
|
1225 |
+
"▁Kmie": 124728,
|
1226 |
+
"▁Koit": 125810,
|
1227 |
+
"▁Kongsan": 126194,
|
1228 |
+
"▁Konstitwensi": 124693,
|
1229 |
+
"▁Kpa": 124133,
|
1230 |
+
"▁Ks": 126021,
|
1231 |
+
"▁Ktien": 126655,
|
1232 |
+
"▁Kumjuh": 125215,
|
1233 |
+
"▁Kumno": 124415,
|
1234 |
+
"▁Kurbah": 125781,
|
1235 |
+
"▁Kylla": 123877,
|
1236 |
+
"▁Kyllalyngkot": 124093,
|
1237 |
+
"▁Kynd": 126083,
|
1238 |
+
"▁Kynjing": 126106,
|
1239 |
+
"▁Kynr": 124439,
|
1240 |
+
"▁Kynrad": 124623,
|
1241 |
+
"▁Kynt": 125054,
|
1242 |
+
"▁Kynthei": 126347,
|
1243 |
+
"▁Kyrmen": 125820,
|
1244 |
+
"▁LALP": 125293,
|
1245 |
+
"▁LALPA": 125517,
|
1246 |
+
"▁LP": 126343,
|
1247 |
+
"▁Ladrymbai": 125409,
|
1248 |
+
"▁Ladthadlaboh": 125953,
|
1249 |
+
"▁Lahkmen": 125694,
|
1250 |
+
"▁Laitumkhrah": 124972,
|
1251 |
+
"▁Lajong": 124179,
|
1252 |
+
"▁Lalpa": 124001,
|
1253 |
+
"▁Lamar": 124505,
|
1254 |
+
"▁Lamare": 124709,
|
1255 |
+
"▁Lambok": 126375,
|
1256 |
+
"▁Lamin": 125658,
|
1257 |
+
"▁Lamphang": 125088,
|
1258 |
+
"▁Lane": 126583,
|
1259 |
+
"▁Langpih": 126529,
|
1260 |
+
"▁Lanong": 126105,
|
1261 |
+
"▁Lashai": 126454,
|
1262 |
+
"▁Lask": 125840,
|
1263 |
+
"▁Latiplang": 126168,
|
1264 |
+
"▁Lber": 123676,
|
1265 |
+
"▁Lebi": 124195,
|
1266 |
+
"▁Lev": 126603,
|
1267 |
+
"▁License": 126632,
|
1268 |
+
"▁Longbriew": 125850,
|
1269 |
+
"▁Loompyrdi": 126221,
|
1270 |
+
"▁Lumshnong": 126547,
|
1271 |
+
"▁Lyngba": 125203,
|
1272 |
+
"▁MBOSE": 126385,
|
1273 |
+
"▁MPCC": 125188,
|
1274 |
+
"▁MPHRC": 125826,
|
1275 |
+
"▁Malki": 125838,
|
1276 |
+
"▁Malngiang": 126318,
|
1277 |
+
"▁Manasseh": 125399,
|
1278 |
+
"▁Manbriew": 125900,
|
1279 |
+
"▁Marbaniang": 124666,
|
1280 |
+
"▁Marngar": 125271,
|
1281 |
+
"▁Marwei": 124721,
|
1282 |
+
"▁Marwein": 125314,
|
1283 |
+
"▁Mawhati": 124925,
|
1284 |
+
"▁Mawiong": 126338,
|
1285 |
+
"▁Mawkhar": 125058,
|
1286 |
+
"▁Mawkynrew": 126154,
|
1287 |
+
"▁Mawkyrwat": 124249,
|
1288 |
+
"▁Mawp": 124969,
|
1289 |
+
"▁Mawphlang": 124598,
|
1290 |
+
"▁Mawr": 124267,
|
1291 |
+
"▁Mawrie": 126376,
|
1292 |
+
"▁Mawryngkneng": 125418,
|
1293 |
+
"▁Mawshynrut": 126115,
|
1294 |
+
"▁Mawthadraishan": 126451,
|
1295 |
+
"▁Mawthoh": 125111,
|
1296 |
+
"▁MeghalayaA": 124648,
|
1297 |
+
"▁MeghalayaApril": 126200,
|
1298 |
+
"▁MeghalayaAugust": 125956,
|
1299 |
+
"▁MeghalayaJ": 124274,
|
1300 |
+
"▁MeghalayaJuly": 126138,
|
1301 |
+
"▁MeghalayaJune": 126126,
|
1302 |
+
"▁MeghalayaM": 124914,
|
1303 |
+
"▁MeghalayaMarch": 126663,
|
1304 |
+
"▁MeghalayaMay": 126335,
|
1305 |
+
"▁Memorial": 125799,
|
1306 |
+
"▁Mes": 126206,
|
1307 |
+
"▁Metbah": 125319,
|
1308 |
+
"▁Mihmyntdu": 126124,
|
1309 |
+
"▁Mihngi": 124730,
|
1310 |
+
"▁Moab": 124852,
|
1311 |
+
"▁Momin": 125413,
|
1312 |
+
"▁Moo": 126225,
|
1313 |
+
"▁Mook": 125481,
|
1314 |
+
"▁Moses": 123367,
|
1315 |
+
"▁Motphran": 125550,
|
1316 |
+
"▁Mow": 126312,
|
1317 |
+
"▁Mukhim": 125216,
|
1318 |
+
"▁Mukhla": 125611,
|
1319 |
+
"▁Mukroh": 125613,
|
1320 |
+
"▁Mylliem": 123501,
|
1321 |
+
"▁Myns": 123686,
|
1322 |
+
"▁Mynsiem": 123946,
|
1323 |
+
"▁Mynta": 123214,
|
1324 |
+
"▁Myntdu": 126134,
|
1325 |
+
"▁Myntri": 122945,
|
1326 |
+
"▁Myr": 125398,
|
1327 |
+
"▁NEI": 124737,
|
1328 |
+
"▁NEIGRI": 126352,
|
1329 |
+
"▁NEIGRIHMS": 126605,
|
1330 |
+
"▁NESO": 125890,
|
1331 |
+
"▁NOC": 125246,
|
1332 |
+
"▁Naba": 124201,
|
1333 |
+
"▁Naduh": 124404,
|
1334 |
+
"▁Nailar": 123790,
|
1335 |
+
"▁Nailur": 123788,
|
1336 |
+
"▁Naitung": 123757,
|
1337 |
+
"▁Naiwieng": 123685,
|
1338 |
+
"▁Nalor": 123741,
|
1339 |
+
"▁Namar": 123332,
|
1340 |
+
"▁Namarkata": 125553,
|
1341 |
+
"▁Namdong": 126348,
|
1342 |
+
"▁Nangbah": 124002,
|
1343 |
+
"▁Nangta": 123157,
|
1344 |
+
"▁Narpuh": 125642,
|
1345 |
+
"▁Nartiang": 125072,
|
1346 |
+
"▁Neb": 126155,
|
1347 |
+
"▁Ngam": 123773,
|
1348 |
+
"▁Ngan": 123276,
|
1349 |
+
"▁Ngim": 124281,
|
1350 |
+
"▁Niam": 123825,
|
1351 |
+
"▁Nohprah": 123492,
|
1352 |
+
"▁Nongbah": 124996,
|
1353 |
+
"▁Nongbri": 125365,
|
1354 |
+
"▁Nonghikai": 126057,
|
1355 |
+
"▁Nongialam": 125711,
|
1356 |
+
"▁Nongk": 123805,
|
1357 |
+
"▁Nongkhlaw": 124880,
|
1358 |
+
"▁Nongkrem": 124399,
|
1359 |
+
"▁Nonglait": 126011,
|
1360 |
+
"▁Nongp": 126537,
|
1361 |
+
"▁Nongpyn": 126329,
|
1362 |
+
"▁Nongrim": 126174,
|
1363 |
+
"▁Nongrum": 123756,
|
1364 |
+
"▁Nongsiej": 124980,
|
1365 |
+
"▁Nongspung": 126324,
|
1366 |
+
"▁Nongst": 123850,
|
1367 |
+
"▁Nongstoiñ": 123886,
|
1368 |
+
"▁Nongsynshar": 124034,
|
1369 |
+
"▁Nongt": 125149,
|
1370 |
+
"▁Nongthaw": 125773,
|
1371 |
+
"▁Nyngkong": 124434,
|
1372 |
+
"▁Olib": 125621,
|
1373 |
+
"▁Ophis": 126039,
|
1374 |
+
"▁Organis": 124634,
|
1375 |
+
"▁Organiz": 126230,
|
1376 |
+
"▁Ot": 124791,
|
1377 |
+
"▁Others": 125132,
|
1378 |
+
"▁Outpost": 126651,
|
1379 |
+
"▁PH": 124740,
|
1380 |
+
"▁PNG": 125788,
|
1381 |
+
"▁PNGRB": 125817,
|
1382 |
+
"▁Paid": 126511,
|
1383 |
+
"▁Palei": 124956,
|
1384 |
+
"▁Pangniang": 126288,
|
1385 |
+
"▁Pariat": 125918,
|
1386 |
+
"▁Parish": 126148,
|
1387 |
+
"▁Passah": 125036,
|
1388 |
+
"▁Pathian": 124041,
|
1389 |
+
"▁Pdeng": 125145,
|
1390 |
+
"▁Peit": 126176,
|
1391 |
+
"▁Permit": 125543,
|
1392 |
+
"▁Petr": 124042,
|
1393 |
+
"▁Petros": 124891,
|
1394 |
+
"▁Phar": 124193,
|
1395 |
+
"▁Phawa": 124810,
|
1396 |
+
"▁Philistia": 124652,
|
1397 |
+
"▁Phim": 126043,
|
1398 |
+
"▁Phin": 124992,
|
1399 |
+
"▁Pnar": 125796,
|
1400 |
+
"▁Poh": 124937,
|
1401 |
+
"▁Publicity": 125623,
|
1402 |
+
"▁Pyllun": 126636,
|
1403 |
+
"▁Pynban": 124704,
|
1404 |
+
"▁Pyng": 125690,
|
1405 |
+
"▁Pyngrope": 125841,
|
1406 |
+
"▁Pyniaid": 125794,
|
1407 |
+
"▁Pynshngain": 124749,
|
1408 |
+
"▁Pynshngaiñ": 124705,
|
1409 |
+
"▁Pynthor": 125964,
|
1410 |
+
"▁Pynursla": 124152,
|
1411 |
+
"▁RBD": 125609,
|
1412 |
+
"▁RBYF": 126053,
|
1413 |
+
"▁Raij": 124624,
|
1414 |
+
"▁Rakhe": 126133,
|
1415 |
+
"▁Raliang": 126394,
|
1416 |
+
"▁Rambrai": 125680,
|
1417 |
+
"▁Rangbah": 122927,
|
1418 |
+
"▁Rangbahduh": 126239,
|
1419 |
+
"▁Ranikor": 124874,
|
1420 |
+
"▁Regulation": 126051,
|
1421 |
+
"▁Reuben": 126650,
|
1422 |
+
"▁Ribhoi": 126493,
|
1423 |
+
"▁Riew": 126319,
|
1424 |
+
"▁Rilum": 126367,
|
1425 |
+
"▁Rim": 126299,
|
1426 |
+
"▁Risaw": 123674,
|
1427 |
+
"▁Rising": 123106,
|
1428 |
+
"▁Riti": 124995,
|
1429 |
+
"▁Rong": 124655,
|
1430 |
+
"▁Roshan": 126492,
|
1431 |
+
"▁Row": 126665,
|
1432 |
+
"▁Rupang": 123999,
|
1433 |
+
"▁Rymbui": 125767,
|
1434 |
+
"▁Rymphang": 123900,
|
1435 |
+
"▁Ryng": 125720,
|
1436 |
+
"▁Ryngkat": 125814,
|
1437 |
+
"▁Rynjah": 125639,
|
1438 |
+
"▁Rynt": 125528,
|
1439 |
+
"▁Ryntathiang": 126555,
|
1440 |
+
"▁SDO": 125957,
|
1441 |
+
"▁SHILLONG": 124065,
|
1442 |
+
"▁SWKH": 125368,
|
1443 |
+
"▁Sabbaton": 126153,
|
1444 |
+
"▁Saitjain": 124157,
|
1445 |
+
"▁Salm": 126291,
|
1446 |
+
"▁Samar": 125420,
|
1447 |
+
"▁Samaria": 125688,
|
1448 |
+
"▁Samla": 123470,
|
1449 |
+
"▁Samuel": 124587,
|
1450 |
+
"▁Saul": 123910,
|
1451 |
+
"▁Sawkmie": 125479,
|
1452 |
+
"▁Sein": 125150,
|
1453 |
+
"▁Seiñ": 125033,
|
1454 |
+
"▁Sengbhalang": 126197,
|
1455 |
+
"▁Sengkmie": 126546,
|
1456 |
+
"▁Shabong": 126531,
|
1457 |
+
"▁Shadap": 125487,
|
1458 |
+
"▁Shal": 126256,
|
1459 |
+
"▁Shangpliang": 125034,
|
1460 |
+
"▁Shangpung": 125191,
|
1461 |
+
"▁Shel": 124383,
|
1462 |
+
"▁Shella": 124551,
|
1463 |
+
"▁Shem": 125719,
|
1464 |
+
"▁Shibun": 125377,
|
1465 |
+
"▁Shira": 126302,
|
1466 |
+
"▁Shnong": 122969,
|
1467 |
+
"▁Shong": 126417,
|
1468 |
+
"▁Shuh": 123383,
|
1469 |
+
"▁Shuwa": 125545,
|
1470 |
+
"▁Shwa": 125446,
|
1471 |
+
"▁Shylla": 123441,
|
1472 |
+
"▁Siangshai": 126590,
|
1473 |
+
"▁Simon": 126578,
|
1474 |
+
"▁Sirdar": 126369,
|
1475 |
+
"▁Sixth": 125104,
|
1476 |
+
"▁Skul": 126595,
|
1477 |
+
"▁Smit": 125946,
|
1478 |
+
"▁Sngew": 124615,
|
1479 |
+
"▁Sngewbha": 125569,
|
1480 |
+
"▁Sngi": 123849,
|
1481 |
+
"▁Sniaw": 125476,
|
1482 |
+
"▁Sniawbhalang": 125824,
|
1483 |
+
"▁Soc": 123511,
|
1484 |
+
"▁Sohiong": 124919,
|
1485 |
+
"▁Solomon": 124208,
|
1486 |
+
"▁Sordar": 124754,
|
1487 |
+
"▁Sorkar": 123093,
|
1488 |
+
"▁Soso": 124774,
|
1489 |
+
"▁Sport": 126374,
|
1490 |
+
"▁Stad": 124381,
|
1491 |
+
"▁Suc": 124392,
|
1492 |
+
"▁Suchiang": 125016,
|
1493 |
+
"▁Sumer": 124823,
|
1494 |
+
"▁Suting": 126419,
|
1495 |
+
"▁Sutnga": 124531,
|
1496 |
+
"▁Swer": 126556,
|
1497 |
+
"▁Syiem": 122943,
|
1498 |
+
"▁Syiemlieh": 124855,
|
1499 |
+
"▁Syllad": 123114,
|
1500 |
+
"▁Symbud": 124678,
|
1501 |
+
"▁Synd": 125610,
|
1502 |
+
"▁Synduk": 125734,
|
1503 |
+
"▁Syngk": 125624,
|
1504 |
+
"▁Synjuk": 123994,
|
1505 |
+
"▁Synshar": 125973,
|
1506 |
+
"▁Syr": 126593,
|
1507 |
+
"▁Tariang": 125951,
|
1508 |
+
"▁Taxi": 126608,
|
1509 |
+
"▁Tei": 125084,
|
1510 |
+
"▁Tein": 126303,
|
1511 |
+
"▁Templ": 123479,
|
1512 |
+
"▁Thabah": 125135,
|
1513 |
+
"▁Thad": 125521,
|
1514 |
+
"▁Tham": 124668,
|
1515 |
+
"▁Thangkhiew": 125139,
|
1516 |
+
"▁Them": 124807,
|
1517 |
+
"▁Thoh": 124299,
|
1518 |
+
"▁Thohdieng": 124645,
|
1519 |
+
"▁Tichuan": 126573,
|
1520 |
+
"▁Tirot": 125165,
|
1521 |
+
"▁Titos": 125371,
|
1522 |
+
"▁Tnad": 126543,
|
1523 |
+
"▁Tnat": 126177,
|
1524 |
+
"▁Tong": 126628,
|
1525 |
+
"▁Tournament": 124312,
|
1526 |
+
"▁Trading": 126622,
|
1527 |
+
"▁Tran": 124449,
|
1528 |
+
"▁Trep": 126165,
|
1529 |
+
"▁Trop": 125963,
|
1530 |
+
"▁Tuber": 125282,
|
1531 |
+
"▁Tynsong": 123930,
|
1532 |
+
"▁Tyr": 125261,
|
1533 |
+
"▁Uba": 124793,
|
1534 |
+
"▁Ula": 123972,
|
1535 |
+
"▁Umiam": 125133,
|
1536 |
+
"▁Ummulong": 126147,
|
1537 |
+
"▁Umsning": 124202,
|
1538 |
+
"▁Une": 124293,
|
1539 |
+
"▁Uno": 125005,
|
1540 |
+
"▁Uranium": 125822,
|
1541 |
+
"▁Uta": 124746,
|
1542 |
+
"▁Utei": 124577,
|
1543 |
+
"▁Uwei": 124114,
|
1544 |
+
"▁VAB": 125684,
|
1545 |
+
"▁VDP": 126481,
|
1546 |
+
"▁VPP": 125437,
|
1547 |
+
"▁WGH": 125763,
|
1548 |
+
"▁WJD": 124686,
|
1549 |
+
"▁WJDSA": 124780,
|
1550 |
+
"▁WJH": 124822,
|
1551 |
+
"▁WKH": 124748,
|
1552 |
+
"▁Waheh": 124184,
|
1553 |
+
"▁Wahlang": 124628,
|
1554 |
+
"▁Wapung": 125290,
|
1555 |
+
"▁Warjri": 124237,
|
1556 |
+
"▁Wasa": 126210,
|
1557 |
+
"▁Watla": 126189,
|
1558 |
+
"▁Wor": 124954,
|
1559 |
+
"▁Ynda": 124287,
|
1560 |
+
"▁Yo": 126111,
|
1561 |
+
"▁aaron": 124126,
|
1562 |
+
"▁abam": 125875,
|
1563 |
+
"▁abraham": 124225,
|
1564 |
+
"▁absalom": 126098,
|
1565 |
+
"▁ada": 123717,
|
1566 |
+
"▁adei": 124306,
|
1567 |
+
"▁adelbert": 125407,
|
1568 |
+
"▁adon": 125159,
|
1569 |
+
"▁agnes": 126552,
|
1570 |
+
"▁ahab": 125667,
|
1571 |
+
"▁ahas": 126648,
|
1572 |
+
"▁aidkam": 125269,
|
1573 |
+
"▁ailad": 123308,
|
1574 |
+
"▁aineh": 124173,
|
1575 |
+
"▁ainguh": 123616,
|
1576 |
+
"▁aiom": 124808,
|
1577 |
+
"▁aireng": 125586,
|
1578 |
+
"▁ais": 124897,
|
1579 |
+
"▁aishah": 125834,
|
1580 |
+
"▁aiñ": 123243,
|
1581 |
+
"▁aka": 124032,
|
1582 |
+
"▁akar": 126205,
|
1583 |
+
"▁akor": 123954,
|
1584 |
+
"▁akren": 124667,
|
1585 |
+
"▁aks": 125397,
|
1586 |
+
"▁aksiden": 126093,
|
1587 |
+
"▁akynduh": 124490,
|
1588 |
+
"▁alam": 123076,
|
1589 |
+
"▁alap": 124841,
|
1590 |
+
"▁ale": 126445,
|
1591 |
+
"▁aleh": 123183,
|
1592 |
+
"▁aleit": 125445,
|
1593 |
+
"▁altar": 126520,
|
1594 |
+
"▁alumlang": 125074,
|
1595 |
+
"▁ambulance": 126616,
|
1596 |
+
"▁amendment": 125175,
|
1597 |
+
"▁amit": 126293,
|
1598 |
+
"▁ammon": 126006,
|
1599 |
+
"▁ampareen": 126470,
|
1600 |
+
"▁anc": 126151,
|
1601 |
+
"▁ancest": 126576,
|
1602 |
+
"▁angel": 123415,
|
1603 |
+
"▁angnud": 125190,
|
1604 |
+
"▁ano": 123871,
|
1605 |
+
"▁ans": 124817,
|
1606 |
+
"▁answered": 125726,
|
1607 |
+
"▁antad": 124975,
|
1608 |
+
"▁aplai": 126384,
|
1609 |
+
"▁apom": 124888,
|
1610 |
+
"▁apostol": 124467,
|
1611 |
+
"▁apot": 126078,
|
1612 |
+
"▁arap": 123393,
|
1613 |
+
"▁ardent": 124533,
|
1614 |
+
"▁arliang": 124323,
|
1615 |
+
"▁arngut": 123642,
|
1616 |
+
"▁aroh": 123636,
|
1617 |
+
"▁arpa": 125825,
|
1618 |
+
"▁arphew": 123654,
|
1619 |
+
"▁arsien": 126292,
|
1620 |
+
"▁arspah": 125512,
|
1621 |
+
"▁artad": 124867,
|
1622 |
+
"▁artat": 126025,
|
1623 |
+
"▁artatien": 124908,
|
1624 |
+
"▁artylli": 124406,
|
1625 |
+
"▁asaid": 126610,
|
1626 |
+
"▁ashong": 126290,
|
1627 |
+
"▁asoh": 126023,
|
1628 |
+
"▁assam": 123228,
|
1629 |
+
"▁assyria": 125232,
|
1630 |
+
"▁asyllok": 126395,
|
1631 |
+
"▁atanga": 125828,
|
1632 |
+
"▁ateh": 126108,
|
1633 |
+
"▁athiah": 126309,
|
1634 |
+
"▁athuh": 123000,
|
1635 |
+
"▁athuhlypa": 123448,
|
1636 |
+
"▁atiar": 124254,
|
1637 |
+
"▁autonomous": 124616,
|
1638 |
+
"▁avângin": 126137,
|
1639 |
+
"▁awan": 126283,
|
1640 |
+
"▁awria": 125607,
|
1641 |
+
"▁aï": 124849,
|
1642 |
+
"▁aïu": 125255,
|
1643 |
+
"▁baal": 125986,
|
1644 |
+
"▁babha": 123681,
|
1645 |
+
"▁babil": 123953,
|
1646 |
+
"▁babilon": 125676,
|
1647 |
+
"▁babilonia": 124485,
|
1648 |
+
"▁babu": 125627,
|
1649 |
+
"▁babun": 124987,
|
1650 |
+
"▁badon": 125090,
|
1651 |
+
"▁badonbor": 124132,
|
1652 |
+
"▁baduk": 125338,
|
1653 |
+
"▁baha": 124270,
|
1654 |
+
"▁bahaing": 126090,
|
1655 |
+
"▁baheh": 125835,
|
1656 |
+
"▁baibl": 126418,
|
1657 |
+
"▁bainong": 124345,
|
1658 |
+
"▁baj": 125281,
|
1659 |
+
"▁baje": 122999,
|
1660 |
+
"▁baka": 125148,
|
1661 |
+
"▁bakh": 123452,
|
1662 |
+
"▁bakhraw": 123637,
|
1663 |
+
"▁bakhuid": 123482,
|
1664 |
+
"▁bakyntang": 124894,
|
1665 |
+
"▁balai": 126581,
|
1666 |
+
"▁balang": 123137,
|
1667 |
+
"▁bamsap": 123844,
|
1668 |
+
"▁banbein": 126076,
|
1669 |
+
"▁baneh": 125644,
|
1670 |
+
"▁bangladesh": 124048,
|
1671 |
+
"▁banse": 124361,
|
1672 |
+
"▁banyngkong": 125478,
|
1673 |
+
"▁bap": 125080,
|
1674 |
+
"▁bapaw": 126503,
|
1675 |
+
"▁bapher": 122848,
|
1676 |
+
"▁barabor": 126582,
|
1677 |
+
"▁barim": 123141,
|
1678 |
+
"▁barit": 124875,
|
1679 |
+
"▁baroh": 122713,
|
1680 |
+
"▁basa": 125431,
|
1681 |
+
"▁basah": 124017,
|
1682 |
+
"▁basaiawmoit": 124869,
|
1683 |
+
"▁basan": 126408,
|
1684 |
+
"▁basha": 123984,
|
1685 |
+
"▁bashatei": 126365,
|
1686 |
+
"▁bashisha": 125295,
|
1687 |
+
"▁bashyrkhei": 126199,
|
1688 |
+
"▁basniew": 125017,
|
1689 |
+
"▁batbor": 126110,
|
1690 |
+
"▁bathymmai": 124304,
|
1691 |
+
"▁bawk": 124680,
|
1692 |
+
"▁beain": 123522,
|
1693 |
+
"▁beaiñ": 124776,
|
1694 |
+
"▁beijot": 125797,
|
1695 |
+
"▁beit": 122890,
|
1696 |
+
"▁beiñ": 125332,
|
1697 |
+
"▁benjamin": 124971,
|
1698 |
+
"▁beth": 125102,
|
1699 |
+
"▁bhabriew": 125345,
|
1700 |
+
"▁bhah": 123584,
|
1701 |
+
"▁bhalang": 124255,
|
1702 |
+
"▁bhoi": 123440,
|
1703 |
+
"▁biang": 122763,
|
1704 |
+
"▁bieit": 123758,
|
1705 |
+
"▁biej": 125710,
|
1706 |
+
"▁bitar": 123524,
|
1707 |
+
"▁bjp": 123347,
|
1708 |
+
"▁blang": 124446,
|
1709 |
+
"▁blei": 122755,
|
1710 |
+
"▁bleit": 123687,
|
1711 |
+
"▁bleithaw": 123689,
|
1712 |
+
"▁bn": 122772,
|
1713 |
+
"▁bna": 125367,
|
1714 |
+
"▁bnai": 122818,
|
1715 |
+
"▁bneng": 123051,
|
1716 |
+
"▁bniah": 123075,
|
1717 |
+
"▁bniat": 125208,
|
1718 |
+
"▁borabor": 123774,
|
1719 |
+
"▁borbah": 125735,
|
1720 |
+
"▁bording": 123355,
|
1721 |
+
"▁brai": 125916,
|
1722 |
+
"▁bran": 124631,
|
1723 |
+
"▁briew": 122711,
|
1724 |
+
"▁bs": 123620,
|
1725 |
+
"▁bsa": 124820,
|
1726 |
+
"▁bsein": 125731,
|
1727 |
+
"▁bsf": 124917,
|
1728 |
+
"▁bsuh": 126102,
|
1729 |
+
"▁btap": 126539,
|
1730 |
+
"▁bteng": 123118,
|
1731 |
+
"▁bthah": 123220,
|
1732 |
+
"▁bthei": 126530,
|
1733 |
+
"▁buaid": 125023,
|
1734 |
+
"▁buddien": 124128,
|
1735 |
+
"▁buhai": 125709,
|
1736 |
+
"▁buhrieh": 123791,
|
1737 |
+
"▁bujli": 124158,
|
1738 |
+
"▁bunkam": 125450,
|
1739 |
+
"▁bunsien": 123697,
|
1740 |
+
"▁burom": 122881,
|
1741 |
+
"▁bylla": 123588,
|
1742 |
+
"▁bymdei": 124915,
|
1743 |
+
"▁bymdon": 126526,
|
1744 |
+
"▁bymhok": 126478,
|
1745 |
+
"▁bymjukut": 124091,
|
1746 |
+
"▁bymkhuid": 123991,
|
1747 |
+
"▁bymm": 125722,
|
1748 |
+
"▁bymman": 123480,
|
1749 |
+
"▁bymmyntoi": 125747,
|
1750 |
+
"▁bymngeit": 126163,
|
1751 |
+
"▁byn": 126273,
|
1752 |
+
"▁byndi": 124877,
|
1753 |
+
"▁bynrap": 123115,
|
1754 |
+
"▁bynriew": 123420,
|
1755 |
+
"▁bynshet": 125866,
|
1756 |
+
"▁bynta": 122718,
|
1757 |
+
"▁byrngem": 123718,
|
1758 |
+
"▁byrni": 125392,
|
1759 |
+
"▁byrnihat": 126113,
|
1760 |
+
"▁byrthen": 126652,
|
1761 |
+
"▁cab": 124944,
|
1762 |
+
"▁cem": 123509,
|
1763 |
+
"▁cement": 125787,
|
1764 |
+
"▁chhuah": 125934,
|
1765 |
+
"▁chhuak": 124813,
|
1766 |
+
"▁chhung": 126585,
|
1767 |
+
"▁chhûng": 126156,
|
1768 |
+
"▁chungah": 125759,
|
1769 |
+
"▁chyne": 126336,
|
1770 |
+
"▁clan": 125776,
|
1771 |
+
"▁conrad": 123864,
|
1772 |
+
"▁cour": 126248,
|
1773 |
+
"▁cs": 125766,
|
1774 |
+
"▁cswo": 126280,
|
1775 |
+
"▁dab": 126269,
|
1776 |
+
"▁dabid": 123272,
|
1777 |
+
"▁dain": 125751,
|
1778 |
+
"▁dait": 125943,
|
1779 |
+
"▁daka": 124507,
|
1780 |
+
"▁dalade": 124560,
|
1781 |
+
"▁daniel": 124879,
|
1782 |
+
"▁dara": 123704,
|
1783 |
+
"▁david": 125584,
|
1784 |
+
"▁dawai": 123237,
|
1785 |
+
"▁dawki": 126241,
|
1786 |
+
"▁dc": 123883,
|
1787 |
+
"▁delhi": 123957,
|
1788 |
+
"▁descend": 125870,
|
1789 |
+
"▁descendants": 126471,
|
1790 |
+
"▁destr": 125594,
|
1791 |
+
"▁dewbilat": 124242,
|
1792 |
+
"▁dewiong": 123034,
|
1793 |
+
"▁dhar": 125948,
|
1794 |
+
"▁diaw": 125994,
|
1795 |
+
"▁dieng": 122880,
|
1796 |
+
"▁diengduh": 125519,
|
1797 |
+
"▁diengphna": 125112,
|
1798 |
+
"▁diengpyn": 126554,
|
1799 |
+
"▁distrik": 124654,
|
1800 |
+
"▁dkhar": 123560,
|
1801 |
+
"▁dkhot": 122863,
|
1802 |
+
"▁dkoh": 126274,
|
1803 |
+
"▁dohkha": 123477,
|
1804 |
+
"▁dohnud": 123325,
|
1805 |
+
"▁donbok": 125098,
|
1806 |
+
"▁donbor": 123811,
|
1807 |
+
"▁donburom": 125051,
|
1808 |
+
"▁donkam": 122843,
|
1809 |
+
"▁donkti": 123487,
|
1810 |
+
"▁donkupar": 125633,
|
1811 |
+
"▁donlang": 124117,
|
1812 |
+
"▁dork": 123951,
|
1813 |
+
"▁dorkhas": 124245,
|
1814 |
+
"▁dp": 124926,
|
1815 |
+
"▁dpei": 125283,
|
1816 |
+
"▁drai": 126609,
|
1817 |
+
"▁drm": 125393,
|
1818 |
+
"▁drmukul": 125427,
|
1819 |
+
"▁drok": 123916,
|
1820 |
+
"▁dud": 124530,
|
1821 |
+
"▁duitara": 125977,
|
1822 |
+
"▁dujok": 126058,
|
1823 |
+
"▁dukan": 123021,
|
1824 |
+
"▁dulir": 125432,
|
1825 |
+
"▁duma": 126644,
|
1826 |
+
"▁duna": 123203,
|
1827 |
+
"▁duriaw": 123368,
|
1828 |
+
"▁durkhmat": 125477,
|
1829 |
+
"▁dusmon": 125119,
|
1830 |
+
"▁dustur": 123716,
|
1831 |
+
"▁duwai": 123310,
|
1832 |
+
"▁duwan": 123360,
|
1833 |
+
"▁dwar": 126294,
|
1834 |
+
"▁dâwn": 125715,
|
1835 |
+
"▁ear": 124671,
|
1836 |
+
"▁eat": 125274,
|
1837 |
+
"▁edom": 125161,
|
1838 |
+
"▁eg": 125816,
|
1839 |
+
"▁ehrang": 125456,
|
1840 |
+
"▁eiei": 122914,
|
1841 |
+
"▁eit": 125438,
|
1842 |
+
"▁ejahar": 124414,
|
1843 |
+
"▁eksam": 125742,
|
1844 |
+
"▁eksamin": 123705,
|
1845 |
+
"▁elaka": 124916,
|
1846 |
+
"▁elek": 122894,
|
1847 |
+
"▁elekshon": 122910,
|
1848 |
+
"▁elijah": 125831,
|
1849 |
+
"▁elisha": 125404,
|
1850 |
+
"▁emaw": 125184,
|
1851 |
+
"▁enem": 126488,
|
1852 |
+
"▁engineer": 125348,
|
1853 |
+
"▁eph": 124611,
|
1854 |
+
"▁ephraim": 125686,
|
1855 |
+
"▁eriong": 125298,
|
1856 |
+
"▁evil": 126300,
|
1857 |
+
"▁ew": 125428,
|
1858 |
+
"▁fapa": 125342,
|
1859 |
+
"▁fc": 125520,
|
1860 |
+
"▁federation": 126012,
|
1861 |
+
"▁fkjgp": 124468,
|
1862 |
+
"▁forum": 126281,
|
1863 |
+
"▁gad": 126172,
|
1864 |
+
"▁gali": 126323,
|
1865 |
+
"▁gandhi": 126377,
|
1866 |
+
"▁garo": 123231,
|
1867 |
+
"▁ger": 126642,
|
1868 |
+
"▁ghadc": 126190,
|
1869 |
+
"▁gilead": 126140,
|
1870 |
+
"▁gn": 125027,
|
1871 |
+
"▁gnla": 125375,
|
1872 |
+
"▁gospel": 124833,
|
1873 |
+
"▁grace": 126604,
|
1874 |
+
"▁guwahati": 125310,
|
1875 |
+
"▁hadien": 122753,
|
1876 |
+
"▁haduh": 122737,
|
1877 |
+
"▁haei": 125056,
|
1878 |
+
"▁haiing": 124558,
|
1879 |
+
"▁hajan": 123188,
|
1880 |
+
"▁hajar": 123009,
|
1881 |
+
"▁hajir": 125142,
|
1882 |
+
"▁haka": 122829,
|
1883 |
+
"▁hakhmat": 123550,
|
1884 |
+
"▁hala": 125394,
|
1885 |
+
"▁halor": 122735,
|
1886 |
+
"▁hamar": 126227,
|
1887 |
+
"▁haneng": 124521,
|
1888 |
+
"▁hangne": 123028,
|
1889 |
+
"▁hangno": 123800,
|
1890 |
+
"▁hangta": 122967,
|
1891 |
+
"▁hangtei": 124529,
|
1892 |
+
"▁hangto": 126613,
|
1893 |
+
"▁hanm": 126109,
|
1894 |
+
"▁hano": 125210,
|
1895 |
+
"▁hapdeng": 122849,
|
1896 |
+
"▁hapoh": 122742,
|
1897 |
+
"▁harud": 123842,
|
1898 |
+
"▁harum": 123891,
|
1899 |
+
"▁hasem": 124260,
|
1900 |
+
"▁hati": 126257,
|
1901 |
+
"▁hato": 123060,
|
1902 |
+
"▁hdr": 126440,
|
1903 |
+
"▁heaven": 125789,
|
1904 |
+
"▁henshad": 126184,
|
1905 |
+
"▁hep": 124359,
|
1906 |
+
"▁heprit": 125382,
|
1907 |
+
"▁hese": 125901,
|
1908 |
+
"▁hesekai": 126623,
|
1909 |
+
"▁hian": 123387,
|
1910 |
+
"▁hiar": 122992,
|
1911 |
+
"▁hiarpateng": 124444,
|
1912 |
+
"▁hiarthma": 126577,
|
1913 |
+
"▁hikai": 123097,
|
1914 |
+
"▁hindu": 126065,
|
1915 |
+
"▁histori": 125128,
|
1916 |
+
"▁hma": 124701,
|
1917 |
+
"▁hman": 125899,
|
1918 |
+
"▁hmang": 126465,
|
1919 |
+
"▁hming": 125877,
|
1920 |
+
"▁hmu": 125307,
|
1921 |
+
"▁hmuh": 125962,
|
1922 |
+
"▁hmun": 124218,
|
1923 |
+
"▁hnam": 126251,
|
1924 |
+
"▁hnlc": 124801,
|
1925 |
+
"▁hnyf": 125744,
|
1926 |
+
"▁hnê": 123809,
|
1927 |
+
"▁hnênah": 124105,
|
1928 |
+
"▁hooid": 125302,
|
1929 |
+
"▁hrang": 125411,
|
1930 |
+
"▁hria": 126629,
|
1931 |
+
"▁hriat": 124847,
|
1932 |
+
"▁hril": 125601,
|
1933 |
+
"▁hrilh": 126104,
|
1934 |
+
"▁hruai": 126449,
|
1935 |
+
"▁hs": 126250,
|
1936 |
+
"▁hsp": 123831,
|
1937 |
+
"▁hspdp": 123840,
|
1938 |
+
"▁hukum": 122835,
|
1939 |
+
"▁hura": 125263,
|
1940 |
+
"▁huri": 126128,
|
1941 |
+
"▁husiar": 125279,
|
1942 |
+
"▁hyc": 124393,
|
1943 |
+
"▁hyn": 123057,
|
1944 |
+
"▁hyndai": 124556,
|
1945 |
+
"▁hynmen": 123619,
|
1946 |
+
"▁hynn": 122903,
|
1947 |
+
"▁hynne": 123713,
|
1948 |
+
"▁hynniew": 124085,
|
1949 |
+
"▁hynniewtrep": 124374,
|
1950 |
+
"▁hynnin": 123059,
|
1951 |
+
"▁hynrei": 122717,
|
1952 |
+
"▁hynriew": 123319,
|
1953 |
+
"▁hynriewphew": 125668,
|
1954 |
+
"▁hynñiew": 123821,
|
1955 |
+
"▁iabit": 124266,
|
1956 |
+
"▁iada": 123128,
|
1957 |
+
"▁iadei": 122953,
|
1958 |
+
"▁iadon": 123798,
|
1959 |
+
"▁iadonlang": 125276,
|
1960 |
+
"▁iahap": 124127,
|
1961 |
+
"▁iaid": 122824,
|
1962 |
+
"▁iaineh": 126403,
|
1963 |
+
"▁iaiong": 124567,
|
1964 |
+
"▁iajan": 125568,
|
1965 |
+
"▁iak": 122795,
|
1966 |
+
"▁iaka": 122788,
|
1967 |
+
"▁iakane": 126315,
|
1968 |
+
"▁iakhaid": 125653,
|
1969 |
+
"▁iakhih": 124332,
|
1970 |
+
"▁iakhun": 123117,
|
1971 |
+
"▁iaki": 122936,
|
1972 |
+
"▁iaknieh": 126149,
|
1973 |
+
"▁iakren": 122957,
|
1974 |
+
"▁iakynduh": 123172,
|
1975 |
+
"▁iala": 124985,
|
1976 |
+
"▁ialade": 123186,
|
1977 |
+
"▁ialam": 123027,
|
1978 |
+
"▁ialang": 125206,
|
1979 |
+
"▁ialap": 123975,
|
1980 |
+
"▁ialeh": 123156,
|
1981 |
+
"▁ialehkai": 123196,
|
1982 |
+
"▁ialum": 126657,
|
1983 |
+
"▁iamih": 126518,
|
1984 |
+
"▁ian": 126127,
|
1985 |
+
"▁iang": 124677,
|
1986 |
+
"▁ianga": 126571,
|
1987 |
+
"▁iangi": 126661,
|
1988 |
+
"▁iapher": 125081,
|
1989 |
+
"▁iaphi": 125757,
|
1990 |
+
"▁iar": 124236,
|
1991 |
+
"▁iarap": 123047,
|
1992 |
+
"▁iaroh": 124199,
|
1993 |
+
"▁iasaid": 125995,
|
1994 |
+
"▁iashim": 123519,
|
1995 |
+
"▁iashimbynta": 125344,
|
1996 |
+
"▁iashong": 125253,
|
1997 |
+
"▁iasnoh": 125937,
|
1998 |
+
"▁iasoh": 124803,
|
1999 |
+
"▁iasoi": 126565,
|
2000 |
+
"▁iasyllok": 126645,
|
2001 |
+
"▁iatai": 124660,
|
2002 |
+
"▁iateh": 126337,
|
2003 |
+
"▁iathoh": 126572,
|
2004 |
+
"▁iathuh": 122766,
|
2005 |
+
"▁iatrei": 123889,
|
2006 |
+
"▁iatreilang": 124172,
|
2007 |
+
"▁iaw": 125266,
|
2008 |
+
"▁ibein": 126528,
|
2009 |
+
"▁ieh": 123024,
|
2010 |
+
"▁iehnoh": 123179,
|
2011 |
+
"▁ieid": 124062,
|
2012 |
+
"▁ieit": 123143,
|
2013 |
+
"▁ieng": 123019,
|
2014 |
+
"▁iewduh": 124687,
|
2015 |
+
"▁iewphew": 126633,
|
2016 |
+
"▁iing": 122790,
|
2017 |
+
"▁iingbishar": 125860,
|
2018 |
+
"▁iingmane": 126000,
|
2019 |
+
"▁iit": 125855,
|
2020 |
+
"▁ijipt": 123433,
|
2021 |
+
"▁ijli": 125329,
|
2022 |
+
"▁iktiar": 125069,
|
2023 |
+
"▁ilekshon": 124187,
|
2024 |
+
"▁ilektrik": 125353,
|
2025 |
+
"▁ilp": 123836,
|
2026 |
+
"▁imat": 124289,
|
2027 |
+
"▁imlang": 123397,
|
2028 |
+
"▁indian": 126466,
|
2029 |
+
"▁ingbishar": 126077,
|
2030 |
+
"▁ingjain": 123914,
|
2031 |
+
"▁ingsyiem": 124729,
|
2032 |
+
"▁injek": 125194,
|
2033 |
+
"▁inner": 126121,
|
2034 |
+
"▁ioh": 122715,
|
2035 |
+
"▁iohi": 122874,
|
2036 |
+
"▁iohlad": 126252,
|
2037 |
+
"▁iohlum": 126080,
|
2038 |
+
"▁iohsngew": 123680,
|
2039 |
+
"▁iong": 123925,
|
2040 |
+
"▁isaak": 125574,
|
2041 |
+
"▁ishu": 124986,
|
2042 |
+
"▁isih": 124059,
|
2043 |
+
"▁iso": 126171,
|
2044 |
+
"▁israel": 122905,
|
2045 |
+
"▁isynei": 124423,
|
2046 |
+
"▁ithuh": 123189,
|
2047 |
+
"▁itynnad": 124206,
|
2048 |
+
"▁itynnat": 124263,
|
2049 |
+
"▁iuh": 123638,
|
2050 |
+
"▁iwei": 123127,
|
2051 |
+
"▁jabieng": 125733,
|
2052 |
+
"▁jaboh": 124100,
|
2053 |
+
"▁jac": 125317,
|
2054 |
+
"▁jaiaw": 125807,
|
2055 |
+
"▁jaidbynriew": 123439,
|
2056 |
+
"▁jaintia": 123168,
|
2057 |
+
"▁jaitbynriew": 122919,
|
2058 |
+
"▁jaiñ": 123702,
|
2059 |
+
"▁jaiñkup": 126366,
|
2060 |
+
"▁jaka": 122726,
|
2061 |
+
"▁jakarieh": 126068,
|
2062 |
+
"▁jakhlia": 124905,
|
2063 |
+
"▁jakob": 123906,
|
2064 |
+
"▁james": 124922,
|
2065 |
+
"▁jamin": 125538,
|
2066 |
+
"▁janai": 124557,
|
2067 |
+
"▁janmiet": 123246,
|
2068 |
+
"▁jeh": 125646,
|
2069 |
+
"▁jeho": 125106,
|
2070 |
+
"▁jentil": 125234,
|
2071 |
+
"▁jer": 123606,
|
2072 |
+
"▁jerem": 125691,
|
2073 |
+
"▁jeroboam": 126304,
|
2074 |
+
"▁jerusalem": 123226,
|
2075 |
+
"▁jes": 124703,
|
2076 |
+
"▁jhadc": 124579,
|
2077 |
+
"▁jhur": 124214,
|
2078 |
+
"▁jin": 123905,
|
2079 |
+
"▁jinga": 123193,
|
2080 |
+
"▁jingai": 122955,
|
2081 |
+
"▁jingaiei": 124572,
|
2082 |
+
"▁jingainguh": 123199,
|
2083 |
+
"▁jingang": 124597,
|
2084 |
+
"▁jingangnud": 124644,
|
2085 |
+
"▁jingartatien": 126217,
|
2086 |
+
"▁jingb": 122778,
|
2087 |
+
"▁jingba": 123529,
|
2088 |
+
"▁jingbah": 126461,
|
2089 |
+
"▁jingbakla": 124213,
|
2090 |
+
"▁jingbam": 123100,
|
2091 |
+
"▁jingbatai": 124464,
|
2092 |
+
"▁jingbha": 123943,
|
2093 |
+
"▁jingbiang": 126259,
|
2094 |
+
"▁jingbishar": 123467,
|
2095 |
+
"▁jingbit": 123590,
|
2096 |
+
"▁jingbitar": 123520,
|
2097 |
+
"▁jingbthah": 123624,
|
2098 |
+
"▁jingbuh": 123714,
|
2099 |
+
"▁jingbuhbeit": 125921,
|
2100 |
+
"▁jingbun": 124009,
|
2101 |
+
"▁jingbym": 123025,
|
2102 |
+
"▁jingbymman": 125808,
|
2103 |
+
"▁jingdap": 124501,
|
2104 |
+
"▁jingdawa": 123159,
|
2105 |
+
"▁jingdeng": 125222,
|
2106 |
+
"▁jingdheng": 125318,
|
2107 |
+
"▁jingdie": 124570,
|
2108 |
+
"▁jingdih": 124402,
|
2109 |
+
"▁jingdkoh": 126499,
|
2110 |
+
"▁jingdon": 122956,
|
2111 |
+
"▁jingdonkam": 123201,
|
2112 |
+
"▁jingdonlang": 125383,
|
2113 |
+
"▁jingduh": 123626,
|
2114 |
+
"▁jingduhnong": 126430,
|
2115 |
+
"▁jingduk": 125580,
|
2116 |
+
"▁jingdum": 124108,
|
2117 |
+
"▁jingduna": 123471,
|
2118 |
+
"▁jingduwai": 123450,
|
2119 |
+
"▁jingeh": 122884,
|
2120 |
+
"▁jingem": 124183,
|
2121 |
+
"▁jinghap": 124758,
|
2122 |
+
"▁jingheh": 125243,
|
2123 |
+
"▁jinghiar": 124588,
|
2124 |
+
"▁jinghikai": 123067,
|
2125 |
+
"▁jingi": 122733,
|
2126 |
+
"▁jingia": 122723,
|
2127 |
+
"▁jingiada": 124101,
|
2128 |
+
"▁jingiadei": 123663,
|
2129 |
+
"▁jingiadon": 124111,
|
2130 |
+
"▁jingiadonlang": 125062,
|
2131 |
+
"▁jingiai": 125019,
|
2132 |
+
"▁jingiaid": 123456,
|
2133 |
+
"▁jingiak": 122856,
|
2134 |
+
"▁jingiakhih": 123223,
|
2135 |
+
"▁jingiakhun": 123221,
|
2136 |
+
"▁jingiakop": 124369,
|
2137 |
+
"▁jingiakren": 123169,
|
2138 |
+
"▁jingiakynduh": 123515,
|
2139 |
+
"▁jingialam": 123409,
|
2140 |
+
"▁jingialang": 122808,
|
2141 |
+
"▁jingialap": 125467,
|
2142 |
+
"▁jingialeh": 124256,
|
2143 |
+
"▁jingialehkai": 123242,
|
2144 |
+
"▁jingiap": 123356,
|
2145 |
+
"▁jingiapher": 125248,
|
2146 |
+
"▁jingiarap": 123174,
|
2147 |
+
"▁jingiaroh": 124496,
|
2148 |
+
"▁jingias": 123444,
|
2149 |
+
"▁jingiaseng": 124112,
|
2150 |
+
"▁jingiat": 123119,
|
2151 |
+
"▁jingiatai": 126158,
|
2152 |
+
"▁jingiateh": 124885,
|
2153 |
+
"▁jingiathuh": 123881,
|
2154 |
+
"▁jingiatrei": 126191,
|
2155 |
+
"▁jingiatreilang": 123768,
|
2156 |
+
"▁jingiatylli": 125809,
|
2157 |
+
"▁jingieid": 125588,
|
2158 |
+
"▁jingieit": 123373,
|
2159 |
+
"▁jingim": 122912,
|
2160 |
+
"▁jingioh": 123321,
|
2161 |
+
"▁jingiohi": 125570,
|
2162 |
+
"▁jingis": 124094,
|
2163 |
+
"▁jingisynei": 124363,
|
2164 |
+
"▁jingit": 124946,
|
2165 |
+
"▁jingithuh": 125066,
|
2166 |
+
"▁jingiwbih": 124244,
|
2167 |
+
"▁jingj": 122764,
|
2168 |
+
"▁jingjah": 124659,
|
2169 |
+
"▁jingjia": 122862,
|
2170 |
+
"▁jingjied": 124679,
|
2171 |
+
"▁jingjngai": 125724,
|
2172 |
+
"▁jingjop": 123247,
|
2173 |
+
"▁jingjot": 124076,
|
2174 |
+
"▁jingjrong": 125771,
|
2175 |
+
"▁jingjulor": 125303,
|
2176 |
+
"▁jingjur": 124185,
|
2177 |
+
"▁jingjurip": 125110,
|
2178 |
+
"▁jingjynjar": 123612,
|
2179 |
+
"▁jingk": 122716,
|
2180 |
+
"▁jingkah": 126349,
|
2181 |
+
"▁jingkam": 124555,
|
2182 |
+
"▁jingkem": 126557,
|
2183 |
+
"▁jingker": 124713,
|
2184 |
+
"▁jingkh": 122973,
|
2185 |
+
"▁jingkha": 123627,
|
2186 |
+
"▁jingkhai": 126574,
|
2187 |
+
"▁jingkhang": 123212,
|
2188 |
+
"▁jingkhawai": 123786,
|
2189 |
+
"▁jingkhein": 123601,
|
2190 |
+
"▁jingkheiñ": 125024,
|
2191 |
+
"▁jingkhia": 126266,
|
2192 |
+
"▁jingkhiah": 125223,
|
2193 |
+
"▁jingkhih": 125905,
|
2194 |
+
"▁jingkhlad": 124473,
|
2195 |
+
"▁jingkhlain": 125178,
|
2196 |
+
"▁jingkhmih": 125854,
|
2197 |
+
"▁jingkhot": 124226,
|
2198 |
+
"▁jingkhraw": 125443,
|
2199 |
+
"▁jingkhu": 124443,
|
2200 |
+
"▁jingkhublei": 125415,
|
2201 |
+
"▁jingkhuid": 125031,
|
2202 |
+
"▁jingkhuslai": 126136,
|
2203 |
+
"▁jingkhynra": 124336,
|
2204 |
+
"▁jingkieng": 123475,
|
2205 |
+
"▁jingkiew": 123936,
|
2206 |
+
"▁jingkit": 123400,
|
2207 |
+
"▁jingkitkhlieh": 123830,
|
2208 |
+
"▁jingkmen": 123431,
|
2209 |
+
"▁jingkn": 123491,
|
2210 |
+
"▁jingknia": 125846,
|
2211 |
+
"▁jingkoit": 123859,
|
2212 |
+
"▁jingkren": 122960,
|
2213 |
+
"▁jingktah": 124845,
|
2214 |
+
"▁jingkular": 123608,
|
2215 |
+
"▁jingkulmar": 124844,
|
2216 |
+
"▁jingkut": 125297,
|
2217 |
+
"▁jingkwah": 123684,
|
2218 |
+
"▁jingkylla": 123552,
|
2219 |
+
"▁jingkylli": 123200,
|
2220 |
+
"▁jingkyllon": 126563,
|
2221 |
+
"▁jingkyn": 124319,
|
2222 |
+
"▁jingkynmaw": 124846,
|
2223 |
+
"▁jingkynnoh": 123428,
|
2224 |
+
"▁jingkynshew": 125530,
|
2225 |
+
"▁jingkynt": 124711,
|
2226 |
+
"▁jingkynthoh": 124979,
|
2227 |
+
"▁jingkyntiew": 126551,
|
2228 |
+
"▁jingkyntu": 126634,
|
2229 |
+
"▁jingkyr": 122923,
|
2230 |
+
"▁jingkyrduh": 125116,
|
2231 |
+
"▁jingkyrkhu": 123700,
|
2232 |
+
"▁jingkyrmen": 123395,
|
2233 |
+
"▁jingkyrpad": 124191,
|
2234 |
+
"▁jingkyrshan": 123381,
|
2235 |
+
"▁jingkñia": 124070,
|
2236 |
+
"▁jingl": 123425,
|
2237 |
+
"▁jinglah": 124276,
|
2238 |
+
"▁jinglait": 123833,
|
2239 |
+
"▁jinglaitluid": 124564,
|
2240 |
+
"▁jinglam": 125352,
|
2241 |
+
"▁jinglap": 125078,
|
2242 |
+
"▁jingleh": 122899,
|
2243 |
+
"▁jinglehniam": 123915,
|
2244 |
+
"▁jinglehrain": 125465,
|
2245 |
+
"▁jingleit": 123175,
|
2246 |
+
"▁jinglong": 122948,
|
2247 |
+
"▁jinglongmraw": 126246,
|
2248 |
+
"▁jinglum": 125716,
|
2249 |
+
"▁jinglut": 123935,
|
2250 |
+
"▁jingm": 122757,
|
2251 |
+
"▁jingma": 123167,
|
2252 |
+
"▁jingmaham": 124549,
|
2253 |
+
"▁jingman": 123817,
|
2254 |
+
"▁jingmane": 124309,
|
2255 |
+
"▁jingmang": 125805,
|
2256 |
+
"▁jingmih": 123389,
|
2257 |
+
"▁jingmihpat": 125507,
|
2258 |
+
"▁jingmudui": 126196,
|
2259 |
+
"▁jingmut": 122837,
|
2260 |
+
"▁jingmynjur": 123976,
|
2261 |
+
"▁jingmynsaw": 124909,
|
2262 |
+
"▁jingmyntoi": 123222,
|
2263 |
+
"▁jingnang": 124338,
|
2264 |
+
"▁jingngeit": 123252,
|
2265 |
+
"▁jingnoh": 125911,
|
2266 |
+
"▁jingong": 123579,
|
2267 |
+
"▁jingot": 125167,
|
2268 |
+
"▁jingp": 122725,
|
2269 |
+
"▁jingpa": 125541,
|
2270 |
+
"▁jingpan": 125593,
|
2271 |
+
"▁jingpang": 122891,
|
2272 |
+
"▁jingpdiang": 126396,
|
2273 |
+
"▁jingpeit": 124146,
|
2274 |
+
"▁jingph": 123893,
|
2275 |
+
"▁jingphah": 126532,
|
2276 |
+
"▁jingphohsniew": 124670,
|
2277 |
+
"▁jingplie": 124866,
|
2278 |
+
"▁jingpule": 123804,
|
2279 |
+
"▁jingpur": 125959,
|
2280 |
+
"▁jingpyllait": 125560,
|
2281 |
+
"▁jingpyn": 122765,
|
2282 |
+
"▁jingpynb": 123369,
|
2283 |
+
"▁jingpynbeit": 124967,
|
2284 |
+
"▁jingpynbna": 123664,
|
2285 |
+
"▁jingpynd": 124800,
|
2286 |
+
"▁jingpyndonkam": 124617,
|
2287 |
+
"▁jingpyni": 124350,
|
2288 |
+
"▁jingpyniaid": 125134,
|
2289 |
+
"▁jingpynim": 125127,
|
2290 |
+
"▁jingpynjot": 124209,
|
2291 |
+
"▁jingpynk": 123743,
|
2292 |
+
"▁jingpynkh": 123853,
|
2293 |
+
"▁jingpynkhreh": 124395,
|
2294 |
+
"▁jingpynkhuid": 124900,
|
2295 |
+
"▁jingpynkup": 125909,
|
2296 |
+
"▁jingpynkylla": 124641,
|
2297 |
+
"▁jingpynlait": 126472,
|
2298 |
+
"▁jingpynlong": 124542,
|
2299 |
+
"▁jingpynlut": 125887,
|
2300 |
+
"▁jingpynmih": 124595,
|
2301 |
+
"▁jingpynpaw": 125522,
|
2302 |
+
"▁jingpynr": 124543,
|
2303 |
+
"▁jingpynrung": 125378,
|
2304 |
+
"▁jingpyns": 124603,
|
2305 |
+
"▁jingpynshai": 124081,
|
2306 |
+
"▁jingpynshitom": 124180,
|
2307 |
+
"▁jingpynslem": 125531,
|
2308 |
+
"▁jingpynt": 123335,
|
2309 |
+
"▁jingpyntip": 124539,
|
2310 |
+
"▁jingpyntrei": 123837,
|
2311 |
+
"▁jingpyntreikam": 124589,
|
2312 |
+
"▁jingpyr": 123065,
|
2313 |
+
"▁jingpyrkhat": 123649,
|
2314 |
+
"▁jingpyrshah": 123599,
|
2315 |
+
"▁jingpyrshang": 124480,
|
2316 |
+
"▁jingr": 123016,
|
2317 |
+
"▁jingrakhe": 123648,
|
2318 |
+
"▁jingri": 123562,
|
2319 |
+
"▁jingriam": 125459,
|
2320 |
+
"▁jingrung": 125625,
|
2321 |
+
"▁jingrwai": 123239,
|
2322 |
+
"▁jings": 122730,
|
2323 |
+
"▁jingsah": 124741,
|
2324 |
+
"▁jingsam": 124738,
|
2325 |
+
"▁jingsaphriang": 125474,
|
2326 |
+
"▁jingsarong": 125683,
|
2327 |
+
"▁jingsawa": 124838,
|
2328 |
+
"▁jingsdang": 124340,
|
2329 |
+
"▁jingsh": 123390,
|
2330 |
+
"▁jingsha": 123339,
|
2331 |
+
"▁jingshad": 124513,
|
2332 |
+
"▁jingshah": 122926,
|
2333 |
+
"▁jingshai": 123334,
|
2334 |
+
"▁jingshakri": 123724,
|
2335 |
+
"▁jingshaniah": 124445,
|
2336 |
+
"▁jingshem": 124090,
|
2337 |
+
"▁jingshemphang": 125025,
|
2338 |
+
"▁jingsheptieng": 124520,
|
2339 |
+
"▁jingshi": 122932,
|
2340 |
+
"▁jingshim": 123899,
|
2341 |
+
"▁jingshisha": 123033,
|
2342 |
+
"▁jingshit": 125654,
|
2343 |
+
"▁jingshitom": 124154,
|
2344 |
+
"▁jingshlei": 124936,
|
2345 |
+
"▁jingshlur": 124961,
|
2346 |
+
"▁jingshna": 124273,
|
2347 |
+
"▁jingshngain": 124819,
|
2348 |
+
"▁jingshong": 123941,
|
2349 |
+
"▁jingsiew": 123855,
|
2350 |
+
"▁jingslem": 126656,
|
2351 |
+
"▁jingsmai": 126071,
|
2352 |
+
"▁jingsneng": 125950,
|
2353 |
+
"▁jingsngew": 122887,
|
2354 |
+
"▁jingsngewbha": 125628,
|
2355 |
+
"▁jingsngewkhia": 124110,
|
2356 |
+
"▁jingsngewlem": 126235,
|
2357 |
+
"▁jingsngewnguh": 124635,
|
2358 |
+
"▁jingsngewsih": 123860,
|
2359 |
+
"▁jingsngewthuh": 124647,
|
2360 |
+
"▁jingsniew": 123789,
|
2361 |
+
"▁jingstad": 123323,
|
2362 |
+
"▁jingsuba": 125280,
|
2363 |
+
"▁jingsuk": 123525,
|
2364 |
+
"▁jingsumar": 123625,
|
2365 |
+
"▁jingsynshar": 123659,
|
2366 |
+
"▁jingt": 122783,
|
2367 |
+
"▁jingtap": 126141,
|
2368 |
+
"▁jingtbit": 125537,
|
2369 |
+
"▁jingtei": 123695,
|
2370 |
+
"▁jingtem": 125559,
|
2371 |
+
"▁jingtep": 123633,
|
2372 |
+
"▁jingth": 122922,
|
2373 |
+
"▁jingthang": 126271,
|
2374 |
+
"▁jingthaw": 123869,
|
2375 |
+
"▁jingthep": 125618,
|
2376 |
+
"▁jingthew": 125219,
|
2377 |
+
"▁jingthiah": 124494,
|
2378 |
+
"▁jingthied": 126619,
|
2379 |
+
"▁jingthmu": 122951,
|
2380 |
+
"▁jingthoh": 123006,
|
2381 |
+
"▁jingthung": 123478,
|
2382 |
+
"▁jingthut": 125490,
|
2383 |
+
"▁jingtieng": 125007,
|
2384 |
+
"▁jingtih": 124346,
|
2385 |
+
"▁jingtim": 124503,
|
2386 |
+
"▁jingtip": 122873,
|
2387 |
+
"▁jingtlot": 126245,
|
2388 |
+
"▁jingtohkit": 123370,
|
2389 |
+
"▁jingtrei": 123110,
|
2390 |
+
"▁jingtreikam": 124122,
|
2391 |
+
"▁jingtwa": 125811,
|
2392 |
+
"▁jingtynjuh": 126404,
|
2393 |
+
"▁jingtyrwa": 124638,
|
2394 |
+
"▁jingud": 125361,
|
2395 |
+
"▁jingujor": 123460,
|
2396 |
+
"▁jingwad": 124997,
|
2397 |
+
"▁jingwan": 123038,
|
2398 |
+
"▁jingwanrah": 126144,
|
2399 |
+
"▁jingï": 122876,
|
2400 |
+
"▁jingïa": 123693,
|
2401 |
+
"▁jingïaid": 126306,
|
2402 |
+
"▁jingïak": 123696,
|
2403 |
+
"▁jingïakhun": 124786,
|
2404 |
+
"▁jingïalang": 124021,
|
2405 |
+
"▁jingïalehkai": 124657,
|
2406 |
+
"▁jingïap": 124177,
|
2407 |
+
"▁jingïarap": 124525,
|
2408 |
+
"▁jingïas": 125603,
|
2409 |
+
"▁jingïat": 125041,
|
2410 |
+
"▁jingïoh": 125604,
|
2411 |
+
"▁jingïohi": 126307,
|
2412 |
+
"▁jirang": 125622,
|
2413 |
+
"▁jisu": 122942,
|
2414 |
+
"▁jiw": 123715,
|
2415 |
+
"▁jng": 122964,
|
2416 |
+
"▁jngai": 123271,
|
2417 |
+
"▁jngi": 126361,
|
2418 |
+
"▁jngoh": 123274,
|
2419 |
+
"▁jngohkai": 124082,
|
2420 |
+
"▁joab": 125468,
|
2421 |
+
"▁john": 125028,
|
2422 |
+
"▁joit": 124488,
|
2423 |
+
"▁jon": 124566,
|
2424 |
+
"▁jonathan": 125983,
|
2425 |
+
"▁jongka": 123173,
|
2426 |
+
"▁jongki": 123138,
|
2427 |
+
"▁jongnga": 125635,
|
2428 |
+
"▁jongngi": 124142,
|
2429 |
+
"▁jongno": 124411,
|
2430 |
+
"▁jongphi": 125029,
|
2431 |
+
"▁jongu": 123213,
|
2432 |
+
"▁jor": 126360,
|
2433 |
+
"▁jordan": 124724,
|
2434 |
+
"▁jos": 123462,
|
2435 |
+
"▁joseph": 124261,
|
2436 |
+
"▁joshua": 124378,
|
2437 |
+
"▁jot": 124086,
|
2438 |
+
"▁jowai": 123986,
|
2439 |
+
"▁jrong": 123688,
|
2440 |
+
"▁jubab": 122869,
|
2441 |
+
"▁jubor": 123442,
|
2442 |
+
"▁judah": 123327,
|
2443 |
+
"▁julor": 125315,
|
2444 |
+
"▁jumai": 125117,
|
2445 |
+
"▁junom": 123455,
|
2446 |
+
"▁jurip": 123346,
|
2447 |
+
"▁jus": 125414,
|
2448 |
+
"▁jutang": 123483,
|
2449 |
+
"▁juti": 125728,
|
2450 |
+
"▁jyll": 125408,
|
2451 |
+
"▁jylla": 122720,
|
2452 |
+
"▁jyllei": 126085,
|
2453 |
+
"▁jylliew": 123669,
|
2454 |
+
"▁jyllop": 125971,
|
2455 |
+
"▁jymmang": 124286,
|
2456 |
+
"▁jynd": 126314,
|
2457 |
+
"▁jynjar": 124074,
|
2458 |
+
"▁jynt": 126431,
|
2459 |
+
"▁jyr": 124735,
|
2460 |
+
"▁jyrngam": 125366,
|
2461 |
+
"▁jyrwa": 126475,
|
2462 |
+
"▁kaban": 123542,
|
2463 |
+
"▁kabu": 123502,
|
2464 |
+
"▁kada": 124204,
|
2465 |
+
"▁kadei": 123240,
|
2466 |
+
"▁kadiang": 126056,
|
2467 |
+
"▁kaei": 122775,
|
2468 |
+
"▁kaiph": 123602,
|
2469 |
+
"▁kaiphod": 123897,
|
2470 |
+
"▁kaiphot": 125185,
|
2471 |
+
"▁kajih": 124818,
|
2472 |
+
"▁kajing": 124279,
|
2473 |
+
"▁kajuh": 122980,
|
2474 |
+
"▁kaliang": 125480,
|
2475 |
+
"▁kalong": 124440,
|
2476 |
+
"▁kamai": 123082,
|
2477 |
+
"▁kambah": 124196,
|
2478 |
+
"▁kamon": 124502,
|
2479 |
+
"▁kamra": 123312,
|
2480 |
+
"▁kamram": 123672,
|
2481 |
+
"▁kana": 124932,
|
2482 |
+
"▁kanaan": 125173,
|
2483 |
+
"▁kandid": 124516,
|
2484 |
+
"▁kandidet": 124584,
|
2485 |
+
"▁kapor": 124830,
|
2486 |
+
"▁karkhana": 123481,
|
2487 |
+
"▁karta": 124322,
|
2488 |
+
"▁kashari": 123852,
|
2489 |
+
"▁katba": 122805,
|
2490 |
+
"▁katdei": 125516,
|
2491 |
+
"▁katei": 122710,
|
2492 |
+
"▁katjuh": 126460,
|
2493 |
+
"▁katkum": 122904,
|
2494 |
+
"▁katne": 122893,
|
2495 |
+
"▁katno": 123073,
|
2496 |
+
"▁kato": 123778,
|
2497 |
+
"▁katto": 122928,
|
2498 |
+
"▁kawang": 124674,
|
2499 |
+
"▁kawei": 122734,
|
2500 |
+
"▁kawng": 126220,
|
2501 |
+
"▁kba": 124787,
|
2502 |
+
"▁kdar": 124622,
|
2503 |
+
"▁kdew": 123015,
|
2504 |
+
"▁kdup": 126070,
|
2505 |
+
"▁keiñ": 125491,
|
2506 |
+
"▁kerkut": 125093,
|
2507 |
+
"▁kew": 123629,
|
2508 |
+
"▁khadar": 124692,
|
2509 |
+
"▁khadc": 123229,
|
2510 |
+
"▁khadduh": 123987,
|
2511 |
+
"▁khaii": 123505,
|
2512 |
+
"▁khait": 125923,
|
2513 |
+
"▁khajna": 123449,
|
2514 |
+
"▁khamtam": 122924,
|
2515 |
+
"▁khanatang": 126260,
|
2516 |
+
"▁khanglad": 124153,
|
2517 |
+
"▁khap": 123956,
|
2518 |
+
"▁khappud": 123259,
|
2519 |
+
"▁kharkhana": 125682,
|
2520 |
+
"▁kharkongor": 125729,
|
2521 |
+
"▁kharshiing": 125554,
|
2522 |
+
"▁khatduh": 123454,
|
2523 |
+
"▁khaw": 123192,
|
2524 |
+
"▁khaïi": 126139,
|
2525 |
+
"▁khein": 123404,
|
2526 |
+
"▁kheit": 126166,
|
2527 |
+
"▁kheiñ": 124519,
|
2528 |
+
"▁khep": 125140,
|
2529 |
+
"▁khet": 123977,
|
2530 |
+
"▁khiah": 123125,
|
2531 |
+
"▁khie": 123344,
|
2532 |
+
"▁khiew": 123963,
|
2533 |
+
"▁khih": 123751,
|
2534 |
+
"▁khim": 125657,
|
2535 |
+
"▁khla": 126261,
|
2536 |
+
"▁khlad": 123151,
|
2537 |
+
"▁khlain": 123463,
|
2538 |
+
"▁khlaiñ": 124303,
|
2539 |
+
"▁khlam": 123072,
|
2540 |
+
"▁khlaw": 123108,
|
2541 |
+
"▁khleh": 124077,
|
2542 |
+
"▁khlei": 126346,
|
2543 |
+
"▁khlein": 126579,
|
2544 |
+
"▁khlem": 122785,
|
2545 |
+
"▁khlieh": 122966,
|
2546 |
+
"▁khliehduh": 126560,
|
2547 |
+
"▁khliehriat": 125665,
|
2548 |
+
"▁khlong": 126066,
|
2549 |
+
"▁khluit": 126060,
|
2550 |
+
"▁khlur": 124330,
|
2551 |
+
"▁khmat": 122840,
|
2552 |
+
"▁khmih": 122996,
|
2553 |
+
"▁khmut": 125515,
|
2554 |
+
"▁khn": 122791,
|
2555 |
+
"▁khnam": 123473,
|
2556 |
+
"▁khnang": 122839,
|
2557 |
+
"▁khniang": 123913,
|
2558 |
+
"▁khoh": 124054,
|
2559 |
+
"▁khohsiew": 125859,
|
2560 |
+
"▁khoit": 125045,
|
2561 |
+
"▁khong": 123641,
|
2562 |
+
"▁khongpong": 125823,
|
2563 |
+
"▁khop": 126482,
|
2564 |
+
"▁khr": 122820,
|
2565 |
+
"▁khraw": 122985,
|
2566 |
+
"▁khreh": 124043,
|
2567 |
+
"▁khriat": 126535,
|
2568 |
+
"▁khring": 124596,
|
2569 |
+
"▁khrist": 123140,
|
2570 |
+
"▁khristan": 123761,
|
2571 |
+
"▁khrong": 125323,
|
2572 |
+
"▁khrup": 126627,
|
2573 |
+
"▁khua": 125730,
|
2574 |
+
"▁khublei": 123326,
|
2575 |
+
"▁khubor": 122774,
|
2576 |
+
"▁khuid": 123204,
|
2577 |
+
"▁khum": 125700,
|
2578 |
+
"▁khun": 122736,
|
2579 |
+
"▁khunbynriew": 126519,
|
2580 |
+
"▁khunlangbrot": 124604,
|
2581 |
+
"▁khunlung": 124130,
|
2582 |
+
"▁khunswet": 125160,
|
2583 |
+
"▁khuri": 125180,
|
2584 |
+
"▁khuslai": 124420,
|
2585 |
+
"▁khusnam": 123547,
|
2586 |
+
"▁khwai": 125616,
|
2587 |
+
"▁khyll": 125238,
|
2588 |
+
"▁khyllah": 125990,
|
2589 |
+
"▁khyllem": 124798,
|
2590 |
+
"▁khyllipmat": 125991,
|
2591 |
+
"▁khyllung": 124047,
|
2592 |
+
"▁khyn": 124753,
|
2593 |
+
"▁khynd": 122847,
|
2594 |
+
"▁khyndai": 123752,
|
2595 |
+
"▁khyndew": 122993,
|
2596 |
+
"▁khyndiat": 123113,
|
2597 |
+
"▁khynn": 122823,
|
2598 |
+
"▁khynnah": 122830,
|
2599 |
+
"▁khynra": 123922,
|
2600 |
+
"▁khyr": 124080,
|
2601 |
+
"▁khyrd": 123233,
|
2602 |
+
"▁khyrdep": 125985,
|
2603 |
+
"▁khyrdop": 123287,
|
2604 |
+
"▁khyrwit": 126389,
|
2605 |
+
"▁kiar": 123847,
|
2606 |
+
"▁kiba": 122706,
|
2607 |
+
"▁kiban": 124903,
|
2608 |
+
"▁kidei": 123911,
|
2609 |
+
"▁kiei": 122822,
|
2610 |
+
"▁kieng": 124726,
|
2611 |
+
"▁kiew": 123046,
|
2612 |
+
"▁kijuh": 125565,
|
2613 |
+
"▁kilom": 124663,
|
2614 |
+
"▁kilomitar": 126310,
|
2615 |
+
"▁kitab": 123777,
|
2616 |
+
"▁kitbok": 124854,
|
2617 |
+
"▁kitei": 122779,
|
2618 |
+
"▁kitkhlieh": 125226,
|
2619 |
+
"▁kito": 122828,
|
2620 |
+
"▁kitrwiang": 126099,
|
2621 |
+
"▁kiwei": 122738,
|
2622 |
+
"▁kj": 126195,
|
2623 |
+
"▁kjat": 123261,
|
2624 |
+
"▁kl": 122850,
|
2625 |
+
"▁klang": 126203,
|
2626 |
+
"▁klet": 123508,
|
2627 |
+
"▁kli": 124952,
|
2628 |
+
"▁kliar": 124489,
|
2629 |
+
"▁klim": 126118,
|
2630 |
+
"▁klob": 126333,
|
2631 |
+
"▁kloi": 123161,
|
2632 |
+
"▁klong": 126476,
|
2633 |
+
"▁klur": 123066,
|
2634 |
+
"▁kmie": 122970,
|
2635 |
+
"▁kner": 125217,
|
2636 |
+
"▁knia": 125555,
|
2637 |
+
"▁knieh": 123813,
|
2638 |
+
"▁kno": 123929,
|
2639 |
+
"▁kob": 125122,
|
2640 |
+
"▁kohnguh": 123135,
|
2641 |
+
"▁koidi": 124384,
|
2642 |
+
"▁koit": 123052,
|
2643 |
+
"▁kolej": 124324,
|
2644 |
+
"▁kolshor": 124960,
|
2645 |
+
"▁komiti": 123924,
|
2646 |
+
"▁komp": 123336,
|
2647 |
+
"▁kompani": 125760,
|
2648 |
+
"▁kompeni": 123526,
|
2649 |
+
"▁kongsan": 123145,
|
2650 |
+
"▁konsil": 126267,
|
2651 |
+
"▁konstitwensi": 123218,
|
2652 |
+
"▁kontrak": 124252,
|
2653 |
+
"▁kontraktor": 124824,
|
2654 |
+
"▁kop": 124665,
|
2655 |
+
"▁kopi": 126320,
|
2656 |
+
"▁kordor": 124650,
|
2657 |
+
"▁kotkhubor": 124188,
|
2658 |
+
"▁kp": 123254,
|
2659 |
+
"▁kper": 124493,
|
2660 |
+
"▁kpoh": 124317,
|
2661 |
+
"▁kpu": 123690,
|
2662 |
+
"▁kput": 125547,
|
2663 |
+
"▁krad": 124069,
|
2664 |
+
"▁krem": 123880,
|
2665 |
+
"▁kren": 122743,
|
2666 |
+
"▁ks": 122878,
|
2667 |
+
"▁ksai": 124759,
|
2668 |
+
"▁ksew": 124837,
|
2669 |
+
"▁kshaid": 126553,
|
2670 |
+
"▁ksiar": 123235,
|
2671 |
+
"▁ksiew": 123262,
|
2672 |
+
"▁ksing": 126432,
|
2673 |
+
"▁ksu": 123384,
|
2674 |
+
"▁ksuid": 123995,
|
2675 |
+
"▁kt": 122853,
|
2676 |
+
"▁ktah": 122937,
|
2677 |
+
"▁ktem": 124060,
|
2678 |
+
"▁kthang": 125096,
|
2679 |
+
"▁kthong": 125976,
|
2680 |
+
"▁kti": 122913,
|
2681 |
+
"▁ktieh": 125469,
|
2682 |
+
"▁ktien": 122892,
|
2683 |
+
"▁kub": 124789,
|
2684 |
+
"▁kubur": 124958,
|
2685 |
+
"▁kud": 126438,
|
2686 |
+
"▁kulai": 123727,
|
2687 |
+
"▁kular": 123031,
|
2688 |
+
"▁kuli": 124571,
|
2689 |
+
"▁kulmar": 124883,
|
2690 |
+
"▁kumar": 125754,
|
2691 |
+
"▁kumba": 122756,
|
2692 |
+
"▁kumjuh": 122925,
|
2693 |
+
"▁kumne": 123256,
|
2694 |
+
"▁kumno": 122864,
|
2695 |
+
"▁kumta": 122771,
|
2696 |
+
"▁kumto": 125632,
|
2697 |
+
"▁kumwei": 124396,
|
2698 |
+
"▁kuna": 124408,
|
2699 |
+
"▁kup": 123711,
|
2700 |
+
"▁kupar": 125655,
|
2701 |
+
"▁kurim": 123934,
|
2702 |
+
"▁kurup": 123351,
|
2703 |
+
"▁kwah": 122813,
|
2704 |
+
"▁kwai": 124498,
|
2705 |
+
"▁ky": 123363,
|
2706 |
+
"▁kyang": 124929,
|
2707 |
+
"▁kyiad": 123776,
|
2708 |
+
"▁kyll": 122961,
|
2709 |
+
"▁kylla": 122897,
|
2710 |
+
"▁kyllain": 125695,
|
2711 |
+
"▁kyllalyngkot": 124766,
|
2712 |
+
"▁kylleng": 123058,
|
2713 |
+
"▁kylli": 122915,
|
2714 |
+
"▁kylliang": 123998,
|
2715 |
+
"▁kyllon": 124030,
|
2716 |
+
"▁kylluid": 126004,
|
2717 |
+
"▁kyllum": 125299,
|
2718 |
+
"▁kynbat": 125669,
|
2719 |
+
"▁kynd": 124901,
|
2720 |
+
"▁kyndit": 123609,
|
2721 |
+
"▁kyndon": 123001,
|
2722 |
+
"▁kyndong": 124523,
|
2723 |
+
"▁kynduh": 123011,
|
2724 |
+
"▁kynhun": 122801,
|
2725 |
+
"▁kynj": 124046,
|
2726 |
+
"▁kynja": 123379,
|
2727 |
+
"▁kynjah": 124853,
|
2728 |
+
"▁kynjat": 125462,
|
2729 |
+
"▁kynjoh": 124301,
|
2730 |
+
"▁kynjri": 125268,
|
2731 |
+
"▁kynmaw": 123162,
|
2732 |
+
"▁kynnoh": 123002,
|
2733 |
+
"▁kynr": 122931,
|
2734 |
+
"▁kynrad": 123472,
|
2735 |
+
"▁kynram": 124351,
|
2736 |
+
"▁kynran": 125768,
|
2737 |
+
"▁kynriah": 124476,
|
2738 |
+
"▁kynroh": 123388,
|
2739 |
+
"▁kynrum": 124438,
|
2740 |
+
"▁kyns": 125460,
|
2741 |
+
"▁kynsai": 125046,
|
2742 |
+
"▁kynsan": 123496,
|
2743 |
+
"▁kynsh": 126178,
|
2744 |
+
"▁kynsha": 126484,
|
2745 |
+
"▁kynshew": 125681,
|
2746 |
+
"▁kynt": 122731,
|
2747 |
+
"▁kynta": 123348,
|
2748 |
+
"▁kyntait": 123101,
|
2749 |
+
"▁kyntang": 123796,
|
2750 |
+
"▁kynthah": 125987,
|
2751 |
+
"▁kynthei": 122868,
|
2752 |
+
"▁kynthih": 124056,
|
2753 |
+
"▁kynthoh": 123827,
|
2754 |
+
"▁kynthup": 122833,
|
2755 |
+
"▁kynti": 123486,
|
2756 |
+
"▁kyntien": 123255,
|
2757 |
+
"▁kyntiew": 123165,
|
2758 |
+
"▁kynting": 126497,
|
2759 |
+
"▁kynton": 125384,
|
2760 |
+
"▁kyntu": 122952,
|
2761 |
+
"▁kyntur": 126549,
|
2762 |
+
"▁kyrdan": 123112,
|
2763 |
+
"▁kyrdoh": 126409,
|
2764 |
+
"▁kyrduh": 124601,
|
2765 |
+
"▁kyrhai": 124278,
|
2766 |
+
"▁kyrkhu": 123523,
|
2767 |
+
"▁kyrkieh": 123646,
|
2768 |
+
"▁kyrmen": 123748,
|
2769 |
+
"▁kyrp": 122861,
|
2770 |
+
"▁kyrpad": 123041,
|
2771 |
+
"▁kyrpang": 123014,
|
2772 |
+
"▁kyrshan": 123111,
|
2773 |
+
"▁kyrt": 125235,
|
2774 |
+
"▁kyrteng": 122802,
|
2775 |
+
"▁kyrthep": 125886,
|
2776 |
+
"▁kyrtong": 122988,
|
2777 |
+
"▁kyrwoh": 123069,
|
2778 |
+
"▁kñia": 125376,
|
2779 |
+
"▁laban": 124815,
|
2780 |
+
"▁lada": 122751,
|
2781 |
+
"▁lade": 122933,
|
2782 |
+
"▁ladep": 123682,
|
2783 |
+
"▁lahduh": 124020,
|
2784 |
+
"▁laip": 123945,
|
2785 |
+
"▁laiphew": 123958,
|
2786 |
+
"▁lais": 124055,
|
2787 |
+
"▁laisen": 124614,
|
2788 |
+
"▁laitluid": 123694,
|
2789 |
+
"▁laitumkhrah": 126390,
|
2790 |
+
"▁lajong": 123518,
|
2791 |
+
"▁lakam": 124816,
|
2792 |
+
"▁laloo": 125843,
|
2793 |
+
"▁lamkhmat": 126354,
|
2794 |
+
"▁lamler": 124050,
|
2795 |
+
"▁lammihngi": 126631,
|
2796 |
+
"▁lamphang": 124630,
|
2797 |
+
"▁langbrot": 123358,
|
2798 |
+
"▁lano": 123723,
|
2799 |
+
"▁lanot": 124977,
|
2800 |
+
"▁lapang": 125077,
|
2801 |
+
"▁lashai": 123414,
|
2802 |
+
"▁latip": 126525,
|
2803 |
+
"▁lawei": 123795,
|
2804 |
+
"▁lber": 124285,
|
2805 |
+
"▁lebi": 123942,
|
2806 |
+
"▁lehbein": 125955,
|
2807 |
+
"▁lehkai": 126467,
|
2808 |
+
"▁lehkmen": 124512,
|
2809 |
+
"▁lehniam": 124155,
|
2810 |
+
"▁lehnoh": 123377,
|
2811 |
+
"▁lehrain": 126626,
|
2812 |
+
"▁lehse": 123729,
|
2813 |
+
"▁leilieh": 126597,
|
2814 |
+
"▁lep": 126428,
|
2815 |
+
"▁ler": 126630,
|
2816 |
+
"▁liangpyrshah": 125249,
|
2817 |
+
"▁lieh": 124382,
|
2818 |
+
"▁lieng": 123446,
|
2819 |
+
"▁liengsuin": 124827,
|
2820 |
+
"▁lilam": 126522,
|
2821 |
+
"▁linen": 125252,
|
2822 |
+
"▁lip": 126175,
|
2823 |
+
"▁litar": 125944,
|
2824 |
+
"▁liv": 124487,
|
2825 |
+
"▁longbriew": 124939,
|
2826 |
+
"▁longi": 123286,
|
2827 |
+
"▁longiing": 123721,
|
2828 |
+
"▁longing": 124023,
|
2829 |
+
"▁longkmie": 123225,
|
2830 |
+
"▁longsem": 125499,
|
2831 |
+
"▁longïing": 123901,
|
2832 |
+
"▁lov": 125441,
|
2833 |
+
"▁lumbah": 124398,
|
2834 |
+
"▁lumlang": 124983,
|
2835 |
+
"▁lumrit": 126523,
|
2836 |
+
"▁lur": 126373,
|
2837 |
+
"▁lute": 124064,
|
2838 |
+
"▁lyiur": 125885,
|
2839 |
+
"▁lym": 123533,
|
2840 |
+
"▁lymda": 124839,
|
2841 |
+
"▁lymne": 124547,
|
2842 |
+
"▁lympung": 123337,
|
2843 |
+
"▁lynd": 123819,
|
2844 |
+
"▁lyndet": 124004,
|
2845 |
+
"▁lyngba": 122800,
|
2846 |
+
"▁lyngdoh": 122902,
|
2847 |
+
"▁lyngk": 123577,
|
2848 |
+
"▁lyngkha": 123548,
|
2849 |
+
"▁lyngkhot": 123962,
|
2850 |
+
"▁lyngkhuh": 125296,
|
2851 |
+
"▁lyngkor": 126421,
|
2852 |
+
"▁lyngkot": 123839,
|
2853 |
+
"▁lyngngoh": 123733,
|
2854 |
+
"▁lyngraw": 125777,
|
2855 |
+
"▁lynn": 126464,
|
2856 |
+
"▁lynnong": 124970,
|
2857 |
+
"▁lynshop": 125769,
|
2858 |
+
"▁lynt": 122797,
|
2859 |
+
"▁lynter": 123666,
|
2860 |
+
"▁lynthem": 126510,
|
2861 |
+
"▁lynti": 122810,
|
2862 |
+
"▁lyoh": 124061,
|
2863 |
+
"▁lypa": 123248,
|
2864 |
+
"▁magistrate": 125764,
|
2865 |
+
"▁maha": 125679,
|
2866 |
+
"▁mairang": 125331,
|
2867 |
+
"▁malu": 125218,
|
2868 |
+
"▁mana": 125292,
|
2869 |
+
"▁manasseh": 125247,
|
2870 |
+
"▁manbha": 126038,
|
2871 |
+
"▁manbriew": 125935,
|
2872 |
+
"▁manipur": 126514,
|
2873 |
+
"▁manla": 123992,
|
2874 |
+
"▁marak": 124272,
|
2875 |
+
"▁maramot": 124441,
|
2876 |
+
"▁marbam": 124238,
|
2877 |
+
"▁marbaniang": 125879,
|
2878 |
+
"▁mardor": 123614,
|
2879 |
+
"▁mareh": 124006,
|
2880 |
+
"▁mariang": 123238,
|
2881 |
+
"▁marjan": 123782,
|
2882 |
+
"▁markhmat": 125388,
|
2883 |
+
"▁marwei": 123298,
|
2884 |
+
"▁marwein": 125945,
|
2885 |
+
"▁mashin": 125337,
|
2886 |
+
"▁mata": 125571,
|
2887 |
+
"▁matlah": 124540,
|
2888 |
+
"▁mawhati": 125922,
|
2889 |
+
"▁mawi": 126483,
|
2890 |
+
"▁mawk": 124602,
|
2891 |
+
"▁mawkhar": 126441,
|
2892 |
+
"▁mawkyrwat": 125895,
|
2893 |
+
"▁mawlai": 123779,
|
2894 |
+
"▁mawlong": 125436,
|
2895 |
+
"▁mawnongrim": 126416,
|
2896 |
+
"▁mawp": 125121,
|
2897 |
+
"▁mawphlang": 125795,
|
2898 |
+
"▁mawr": 125567,
|
2899 |
+
"▁maws": 123466,
|
2900 |
+
"▁mawshun": 124795,
|
2901 |
+
"▁mawsiang": 124725,
|
2902 |
+
"▁mawsynram": 125389,
|
2903 |
+
"▁mawthoh": 126033,
|
2904 |
+
"▁mb": 125815,
|
2905 |
+
"▁mda": 124763,
|
2906 |
+
"▁mdc": 123120,
|
2907 |
+
"▁meecl": 124907,
|
2908 |
+
"▁meg": 122962,
|
2909 |
+
"▁meghalaya": 122974,
|
2910 |
+
"▁meh": 126541,
|
2911 |
+
"▁mes": 124761,
|
2912 |
+
"▁messa": 125897,
|
2913 |
+
"▁meter": 125360,
|
2914 |
+
"▁meti": 126119,
|
2915 |
+
"▁metiap": 124129,
|
2916 |
+
"▁miat": 124391,
|
2917 |
+
"▁mie": 123004,
|
2918 |
+
"▁miej": 124262,
|
2919 |
+
"▁miet": 123045,
|
2920 |
+
"▁mihkhmat": 124339,
|
2921 |
+
"▁mihngi": 123258,
|
2922 |
+
"▁mihnoh": 123603,
|
2923 |
+
"▁mihpat": 125423,
|
2924 |
+
"▁mihpaw": 126270,
|
2925 |
+
"▁minot": 126135,
|
2926 |
+
"▁mitar": 123979,
|
2927 |
+
"▁miz": 125752,
|
2928 |
+
"▁mizoram": 126263,
|
2929 |
+
"▁ml": 124031,
|
2930 |
+
"▁mla": 123063,
|
2931 |
+
"▁mlien": 124910,
|
2932 |
+
"▁mluh": 125765,
|
2933 |
+
"▁moab": 124767,
|
2934 |
+
"▁modi": 124882,
|
2935 |
+
"▁moses": 123317,
|
2936 |
+
"▁motphran": 126500,
|
2937 |
+
"▁mpcc": 126586,
|
2938 |
+
"▁mr": 126188,
|
2939 |
+
"▁mradkhlaw": 124514,
|
2940 |
+
"▁mraw": 123469,
|
2941 |
+
"▁muda": 125671,
|
2942 |
+
"▁mudui": 125065,
|
2943 |
+
"▁mukotduma": 124222,
|
2944 |
+
"▁mukul": 123650,
|
2945 |
+
"▁muluk": 124478,
|
2946 |
+
"▁muslim": 126468,
|
2947 |
+
"▁mutdur": 125732,
|
2948 |
+
"▁mutlop": 124994,
|
2949 |
+
"▁mylliem": 123949,
|
2950 |
+
"▁myllung": 124386,
|
2951 |
+
"▁mynba": 123419,
|
2952 |
+
"▁mynbarim": 126399,
|
2953 |
+
"▁mynder": 123965,
|
2954 |
+
"▁mynh": 124250,
|
2955 |
+
"▁mynhynnin": 124861,
|
2956 |
+
"▁mynjur": 123331,
|
2957 |
+
"▁mynmiet": 123576,
|
2958 |
+
"▁mynn": 123885,
|
2959 |
+
"▁mynne": 126019,
|
2960 |
+
"▁mynno": 124651,
|
2961 |
+
"▁myns": 122792,
|
2962 |
+
"▁mynsaw": 123345,
|
2963 |
+
"▁mynshem": 125536,
|
2964 |
+
"▁mynshuwa": 123329,
|
2965 |
+
"▁mynshwa": 123386,
|
2966 |
+
"▁mynsiem": 122872,
|
2967 |
+
"▁mynsngi": 124241,
|
2968 |
+
"▁mynstep": 123181,
|
2969 |
+
"▁mynt": 124427,
|
2970 |
+
"▁mynta": 122719,
|
2971 |
+
"▁mynthi": 126167,
|
2972 |
+
"▁myntoi": 123838,
|
2973 |
+
"▁myntri": 122787,
|
2974 |
+
"▁myr": 125873,
|
2975 |
+
"▁nadien": 125277,
|
2976 |
+
"▁nadong": 125129,
|
2977 |
+
"▁naduh": 122780,
|
2978 |
+
"▁naei": 126654,
|
2979 |
+
"▁nailar": 124743,
|
2980 |
+
"▁nailur": 124727,
|
2981 |
+
"▁naitung": 124832,
|
2982 |
+
"▁naiwieng": 124812,
|
2983 |
+
"▁naki": 124953,
|
2984 |
+
"▁nala": 125572,
|
2985 |
+
"▁nalor": 123050,
|
2986 |
+
"▁namar": 122724,
|
2987 |
+
"▁namarba": 123504,
|
2988 |
+
"▁namarkata": 124135,
|
2989 |
+
"▁nangne": 123739,
|
2990 |
+
"▁nangno": 125704,
|
2991 |
+
"▁nangta": 122898,
|
2992 |
+
"▁napdeng": 123411,
|
2993 |
+
"▁naphang": 125200,
|
2994 |
+
"▁napoh": 123767,
|
2995 |
+
"▁narendra": 126117,
|
2996 |
+
"▁nb": 126052,
|
2997 |
+
"▁nc": 126600,
|
2998 |
+
"▁nebukhadnessar": 126353,
|
2999 |
+
"▁nehu": 125350,
|
3000 |
+
"▁neig": 126666,
|
3001 |
+
"▁neih": 125101,
|
3002 |
+
"▁nemsniew": 126268,
|
3003 |
+
"▁ngai": 124073,
|
3004 |
+
"▁ngaih": 126490,
|
3005 |
+
"▁ngam": 122987,
|
3006 |
+
"▁ngan": 122793,
|
3007 |
+
"▁ngap": 124627,
|
3008 |
+
"▁ngat": 123734,
|
3009 |
+
"▁nge": 124796,
|
3010 |
+
"▁ngei": 125562,
|
3011 |
+
"▁ngeit": 123116,
|
3012 |
+
"▁ngiah": 126122,
|
3013 |
+
"▁ngim": 123077,
|
3014 |
+
"▁ngon": 125892,
|
3015 |
+
"▁ngt": 125699,
|
3016 |
+
"▁nguh": 124362,
|
3017 |
+
"▁ngut": 122727,
|
3018 |
+
"▁niah": 124243,
|
3019 |
+
"▁niam": 122975,
|
3020 |
+
"▁niew": 123909,
|
3021 |
+
"▁niut": 124673,
|
3022 |
+
"▁nohphai": 124835,
|
3023 |
+
"▁nohprah": 124524,
|
3024 |
+
"▁nohsngi": 125940,
|
3025 |
+
"▁nombar": 124626,
|
3026 |
+
"▁nongai": 123812,
|
3027 |
+
"▁nongap": 123708,
|
3028 |
+
"▁nongaplangbrot": 126485,
|
3029 |
+
"▁nongb": 123205,
|
3030 |
+
"▁nongbah": 122857,
|
3031 |
+
"▁nongbishar": 124103,
|
3032 |
+
"▁nongbud": 124417,
|
3033 |
+
"▁nongbylla": 124136,
|
3034 |
+
"▁nongdie": 123645,
|
3035 |
+
"▁nonghikai": 122989,
|
3036 |
+
"▁nongi": 123102,
|
3037 |
+
"▁nongia": 122796,
|
3038 |
+
"▁nongialam": 122816,
|
3039 |
+
"▁nongialehkai": 124388,
|
3040 |
+
"▁nongiasaid": 125992,
|
3041 |
+
"▁nongiathuh": 125447,
|
3042 |
+
"▁nongiathuhlypa": 125746,
|
3043 |
+
"▁nongioh": 126559,
|
3044 |
+
"▁nongjop": 124930,
|
3045 |
+
"▁nongk": 123037,
|
3046 |
+
"▁nongkh": 125864,
|
3047 |
+
"▁nongkha": 123556,
|
3048 |
+
"▁nongkhaii": 123816,
|
3049 |
+
"▁nongkhlaw": 126388,
|
3050 |
+
"▁nongkit": 123365,
|
3051 |
+
"▁nongkitkam": 123507,
|
3052 |
+
"▁nongkrem": 125439,
|
3053 |
+
"▁nongkren": 124717,
|
3054 |
+
"▁nongkyndong": 123574,
|
3055 |
+
"▁nongkyrshan": 123872,
|
3056 |
+
"▁nonglam": 124491,
|
3057 |
+
"▁nonglamktien": 124607,
|
3058 |
+
"▁nongleh": 124865,
|
3059 |
+
"▁nongleit": 124124,
|
3060 |
+
"▁nonglum": 126392,
|
3061 |
+
"▁nonglute": 126658,
|
3062 |
+
"▁nongm": 125852,
|
3063 |
+
"▁nongmih": 123375,
|
3064 |
+
"▁nongmihkhmat": 123495,
|
3065 |
+
"▁nongniah": 123536,
|
3066 |
+
"▁nongp": 122971,
|
3067 |
+
"▁nongpang": 123591,
|
3068 |
+
"▁nongpeit": 124899,
|
3069 |
+
"▁nongpoh": 124040,
|
3070 |
+
"▁nongpop": 125762,
|
3071 |
+
"▁nongpule": 126157,
|
3072 |
+
"▁nongpyn": 123378,
|
3073 |
+
"▁nongpyniaid": 124431,
|
3074 |
+
"▁nongr": 122998,
|
3075 |
+
"▁nongrah": 125839,
|
3076 |
+
"▁nongrep": 123132,
|
3077 |
+
"▁nongri": 125982,
|
3078 |
+
"▁nongrim": 123056,
|
3079 |
+
"▁nongrit": 124857,
|
3080 |
+
"▁nongrum": 124341,
|
3081 |
+
"▁nongrwai": 125359,
|
3082 |
+
"▁nongs": 122877,
|
3083 |
+
"▁nongseng": 125677,
|
3084 |
+
"▁nongsh": 125002,
|
3085 |
+
"▁nongsha": 125291,
|
3086 |
+
"▁nongsharai": 125942,
|
3087 |
+
"▁nongshet": 126617,
|
3088 |
+
"▁nongshim": 125706,
|
3089 |
+
"▁nongshna": 126566,
|
3090 |
+
"▁nongshong": 122941,
|
3091 |
+
"▁nongshongshnong": 125978,
|
3092 |
+
"▁nongshun": 123211,
|
3093 |
+
"▁nongsiej": 126355,
|
3094 |
+
"▁nongstoin": 123769,
|
3095 |
+
"▁nongsumar": 125858,
|
3096 |
+
"▁nongsynshar": 123245,
|
3097 |
+
"▁nongt": 123422,
|
3098 |
+
"▁nongtem": 125968,
|
3099 |
+
"▁nongthaw": 124708,
|
3100 |
+
"▁nongthep": 124471,
|
3101 |
+
"▁nongthied": 125488,
|
3102 |
+
"▁nongthoh": 123005,
|
3103 |
+
"▁nongthohkhubor": 123541,
|
3104 |
+
"▁nongthymmai": 125966,
|
3105 |
+
"▁nongtrei": 122939,
|
3106 |
+
"▁nongtuh": 125123,
|
3107 |
+
"▁nongwan": 123728,
|
3108 |
+
"▁nongwei": 123506,
|
3109 |
+
"▁nongï": 123241,
|
3110 |
+
"▁nongïalam": 123611,
|
3111 |
+
"▁nongïathuhlypa": 124355,
|
3112 |
+
"▁npp": 123808,
|
3113 |
+
"▁nujor": 125454,
|
3114 |
+
"▁nuksa": 123640,
|
3115 |
+
"▁nurse": 125857,
|
3116 |
+
"▁nuti": 124962,
|
3117 |
+
"▁nylla": 126015,
|
3118 |
+
"▁nyngkong": 122906,
|
3119 |
+
"▁oannis": 124804,
|
3120 |
+
"▁obe": 126044,
|
3121 |
+
"▁oh": 122851,
|
3122 |
+
"▁ohi": 123012,
|
3123 |
+
"▁ohipaw": 125354,
|
3124 |
+
"▁ohkynti": 125689,
|
3125 |
+
"▁ohpdiang": 124569,
|
3126 |
+
"▁ohsngew": 123328,
|
3127 |
+
"▁oid": 125830,
|
3128 |
+
"▁olib": 125174,
|
3129 |
+
"▁ongu": 126325,
|
3130 |
+
"▁ophis": 122875,
|
3131 |
+
"▁ophisar": 123316,
|
3132 |
+
"▁otsher": 125591,
|
3133 |
+
"▁pad": 126570,
|
3134 |
+
"▁pahara": 123980,
|
3135 |
+
"▁pahuh": 126411,
|
3136 |
+
"▁paidbah": 122728,
|
3137 |
+
"▁paidlang": 125010,
|
3138 |
+
"▁painkhana": 126362,
|
3139 |
+
"▁paitbah": 123888,
|
3140 |
+
"▁paka": 125032,
|
3141 |
+
"▁pakhang": 123772,
|
3142 |
+
"▁pakhat": 125652,
|
3143 |
+
"▁pala": 125114,
|
3144 |
+
"▁palei": 125351,
|
3145 |
+
"▁pansngiat": 126435,
|
3146 |
+
"▁parabangeit": 123928,
|
3147 |
+
"▁parad": 125433,
|
3148 |
+
"▁paradoh": 126219,
|
3149 |
+
"▁paralok": 123194,
|
3150 |
+
"▁paramarjan": 125778,
|
3151 |
+
"▁paras": 126558,
|
3152 |
+
"▁parti": 125793,
|
3153 |
+
"▁pastor": 124315,
|
3154 |
+
"▁pateng": 123216,
|
3155 |
+
"▁pathai": 123282,
|
3156 |
+
"▁pathar": 124068,
|
3157 |
+
"▁patiaw": 125912,
|
3158 |
+
"▁patsha": 124314,
|
3159 |
+
"▁paul": 123583,
|
3160 |
+
"▁pawh": 123890,
|
3161 |
+
"▁pawkhmat": 126055,
|
3162 |
+
"▁pawnam": 124537,
|
3163 |
+
"▁pd": 123982,
|
3164 |
+
"▁pdeng": 122995,
|
3165 |
+
"▁pdf": 124380,
|
3166 |
+
"▁pdiang": 122908,
|
3167 |
+
"▁peit": 122814,
|
3168 |
+
"▁peitngor": 125166,
|
3169 |
+
"▁pep": 124442,
|
3170 |
+
"▁petr": 123604,
|
3171 |
+
"▁petros": 124618,
|
3172 |
+
"▁phadar": 126544,
|
3173 |
+
"▁phah": 122798,
|
3174 |
+
"▁phai": 122865,
|
3175 |
+
"▁phainal": 124643,
|
3176 |
+
"▁phang": 123236,
|
3177 |
+
"▁phara": 125998,
|
3178 |
+
"▁pharaoh": 126081,
|
3179 |
+
"▁phareng": 123988,
|
3180 |
+
"▁pharisi": 125647,
|
3181 |
+
"▁pharshi": 124769,
|
3182 |
+
"▁phasi": 125868,
|
3183 |
+
"▁phatok": 124320,
|
3184 |
+
"▁phc": 125761,
|
3185 |
+
"▁phew": 123443,
|
3186 |
+
"▁phiah": 124752,
|
3187 |
+
"▁phikir": 125587,
|
3188 |
+
"▁philistia": 124461,
|
3189 |
+
"▁phim": 123171,
|
3190 |
+
"▁phin": 122916,
|
3191 |
+
"▁phira": 124088,
|
3192 |
+
"▁phla": 124357,
|
3193 |
+
"▁phlang": 123835,
|
3194 |
+
"▁phlim": 123726,
|
3195 |
+
"▁phohsniew": 126401,
|
3196 |
+
"▁phom": 125429,
|
3197 |
+
"▁phong": 123621,
|
3198 |
+
"▁phra": 123731,
|
3199 |
+
"▁phria": 125379,
|
3200 |
+
"▁phuh": 125068,
|
3201 |
+
"▁phut": 123907,
|
3202 |
+
"▁phutbol": 124575,
|
3203 |
+
"▁phylla": 123305,
|
3204 |
+
"▁phyllaw": 123679,
|
3205 |
+
"▁phylliew": 126567,
|
3206 |
+
"▁phyrnai": 124942,
|
3207 |
+
"▁phyrng": 125047,
|
3208 |
+
"▁phyrngab": 125666,
|
3209 |
+
"▁plas": 124913,
|
3210 |
+
"▁pli": 126192,
|
3211 |
+
"▁pliang": 124288,
|
3212 |
+
"▁pluh": 125914,
|
3213 |
+
"▁pohiing": 124836,
|
3214 |
+
"▁pohing": 125804,
|
3215 |
+
"▁poim": 125741,
|
3216 |
+
"▁poitri": 126228,
|
3217 |
+
"▁poiwir": 126079,
|
3218 |
+
"▁polisi": 126054,
|
3219 |
+
"▁politik": 124405,
|
3220 |
+
"▁polo": 125504,
|
3221 |
+
"▁pom": 124376,
|
3222 |
+
"▁posit": 124192,
|
3223 |
+
"▁pra": 123832,
|
3224 |
+
"▁prad": 124904,
|
3225 |
+
"▁pradesh": 125284,
|
3226 |
+
"▁presbyterian": 125605,
|
3227 |
+
"▁prestone": 124302,
|
3228 |
+
"▁prie": 124455,
|
3229 |
+
"▁priests": 126160,
|
3230 |
+
"▁prob": 125486,
|
3231 |
+
"▁probins": 126152,
|
3232 |
+
"▁prod": 126075,
|
3233 |
+
"▁prog": 124358,
|
3234 |
+
"▁prokram": 123406,
|
3235 |
+
"▁prophet": 125717,
|
3236 |
+
"▁ps": 126005,
|
3237 |
+
"▁publicity": 126334,
|
3238 |
+
"▁puit": 125241,
|
3239 |
+
"▁puk": 124228,
|
3240 |
+
"▁pukri": 125549,
|
3241 |
+
"▁purew": 125996,
|
3242 |
+
"▁pus": 125969,
|
3243 |
+
"▁puson": 126450,
|
3244 |
+
"▁puta": 125637,
|
3245 |
+
"▁puthi": 123585,
|
3246 |
+
"▁pw": 124714,
|
3247 |
+
"▁pwd": 124794,
|
3248 |
+
"▁pyll": 124895,
|
3249 |
+
"▁pyllait": 122930,
|
3250 |
+
"▁pyllaitluid": 126107,
|
3251 |
+
"▁pylleng": 126181,
|
3252 |
+
"▁pylliem": 126550,
|
3253 |
+
"▁pyllong": 125169,
|
3254 |
+
"▁pyllun": 125535,
|
3255 |
+
"▁pynap": 125275,
|
3256 |
+
"▁pynb": 122782,
|
3257 |
+
"▁pynbam": 125626,
|
3258 |
+
"▁pynban": 123007,
|
3259 |
+
"▁pynbaptis": 125373,
|
3260 |
+
"▁pynbeit": 123086,
|
3261 |
+
"▁pynbha": 123517,
|
3262 |
+
"▁pynbiang": 123187,
|
3263 |
+
"▁pynbiej": 125656,
|
3264 |
+
"▁pynbit": 125458,
|
3265 |
+
"▁pynbitar": 125416,
|
3266 |
+
"▁pynbna": 122963,
|
3267 |
+
"▁pynbor": 123826,
|
3268 |
+
"▁pynbud": 125640,
|
3269 |
+
"▁pynbun": 125954,
|
3270 |
+
"▁pynd": 122841,
|
3271 |
+
"▁pyndait": 124781,
|
3272 |
+
"▁pyndam": 124029,
|
3273 |
+
"▁pyndap": 123423,
|
3274 |
+
"▁pyndem": 123814,
|
3275 |
+
"▁pyndep": 122947,
|
3276 |
+
"▁pyndik": 125513,
|
3277 |
+
"▁pyndon": 123895,
|
3278 |
+
"▁pyndonburom": 124851,
|
3279 |
+
"▁pyndonkam": 122938,
|
3280 |
+
"▁pynduh": 123277,
|
3281 |
+
"▁pynduna": 124508,
|
3282 |
+
"▁pyneh": 125381,
|
3283 |
+
"▁pyng": 124536,
|
3284 |
+
"▁pyngkiang": 126351,
|
3285 |
+
"▁pyngngad": 126276,
|
3286 |
+
"▁pynhap": 123771,
|
3287 |
+
"▁pynheh": 124606,
|
3288 |
+
"▁pynher": 124466,
|
3289 |
+
"▁pynhiar": 123749,
|
3290 |
+
"▁pynhun": 125424,
|
3291 |
+
"▁pyni": 122759,
|
3292 |
+
"▁pynia": 123301,
|
3293 |
+
"▁pyniaid": 122954,
|
3294 |
+
"▁pyniap": 123219,
|
3295 |
+
"▁pyniar": 126238,
|
3296 |
+
"▁pyniasoh": 123746,
|
3297 |
+
"▁pynieng": 123634,
|
3298 |
+
"▁pynim": 123528,
|
3299 |
+
"▁pynioh": 123618,
|
3300 |
+
"▁pynit": 126048,
|
3301 |
+
"▁pynithuh": 126415,
|
3302 |
+
"▁pynj": 122882,
|
3303 |
+
"▁pynjah": 124140,
|
3304 |
+
"▁pynjahburom": 125718,
|
3305 |
+
"▁pynjanai": 126501,
|
3306 |
+
"▁pynjari": 124470,
|
3307 |
+
"▁pynjem": 126386,
|
3308 |
+
"▁pynjlan": 124974,
|
3309 |
+
"▁pynjop": 125463,
|
3310 |
+
"▁pynjot": 123087,
|
3311 |
+
"▁pynjulor": 124642,
|
3312 |
+
"▁pynjur": 126611,
|
3313 |
+
"▁pynjynjar": 126640,
|
3314 |
+
"▁pynk": 122758,
|
3315 |
+
"▁pynkh": 122886,
|
3316 |
+
"▁pynkha": 125207,
|
3317 |
+
"▁pynkham": 126426,
|
3318 |
+
"▁pynkhein": 123719,
|
3319 |
+
"▁pynkheiñ": 124973,
|
3320 |
+
"▁pynkhiah": 124149,
|
3321 |
+
"▁pynkhie": 126588,
|
3322 |
+
"▁pynkhih": 123863,
|
3323 |
+
"▁pynkhlain": 123848,
|
3324 |
+
"▁pynkhlaiñ": 125997,
|
3325 |
+
"▁pynkhreh": 123104,
|
3326 |
+
"▁pynkhuid": 123459,
|
3327 |
+
"▁pynkhyllem": 124790,
|
3328 |
+
"▁pynkiew": 123874,
|
3329 |
+
"▁pynkit": 124176,
|
3330 |
+
"▁pynkloi": 125770,
|
3331 |
+
"▁pynksan": 125225,
|
3332 |
+
"▁pynkulmar": 126322,
|
3333 |
+
"▁pynkup": 123545,
|
3334 |
+
"▁pynkut": 123302,
|
3335 |
+
"▁pynkylla": 123251,
|
3336 |
+
"▁pynkyndit": 125888,
|
3337 |
+
"▁pynkynmaw": 123253,
|
3338 |
+
"▁pynkynriah": 123846,
|
3339 |
+
"▁pynkyntang": 124012,
|
3340 |
+
"▁pynl": 123078,
|
3341 |
+
"▁pynlait": 123530,
|
3342 |
+
"▁pynlang": 125595,
|
3343 |
+
"▁pynleit": 124553,
|
3344 |
+
"▁pynliem": 123580,
|
3345 |
+
"▁pynlip": 124765,
|
3346 |
+
"▁pynlong": 122747,
|
3347 |
+
"▁pynlut": 123558,
|
3348 |
+
"▁pynlwet": 125800,
|
3349 |
+
"▁pynlyngngoh": 126342,
|
3350 |
+
"▁pynm": 123461,
|
3351 |
+
"▁pynmih": 122907,
|
3352 |
+
"▁pynmihpat": 125324,
|
3353 |
+
"▁pynmong": 125645,
|
3354 |
+
"▁pynmynsaw": 124221,
|
3355 |
+
"▁pynneh": 123494,
|
3356 |
+
"▁pynngat": 124232,
|
3357 |
+
"▁pynp": 122768,
|
3358 |
+
"▁pynpait": 124829,
|
3359 |
+
"▁pynpaw": 122827,
|
3360 |
+
"▁pynph": 124777,
|
3361 |
+
"▁pynphai": 123691,
|
3362 |
+
"▁pynphriang": 126088,
|
3363 |
+
"▁pynpoi": 123710,
|
3364 |
+
"▁pynpra": 123824,
|
3365 |
+
"▁pynr": 123035,
|
3366 |
+
"▁pynrem": 123230,
|
3367 |
+
"▁pynrit": 126515,
|
3368 |
+
"▁pynroi": 123468,
|
3369 |
+
"▁pynrung": 123497,
|
3370 |
+
"▁pyns": 122804,
|
3371 |
+
"▁pynsah": 123567,
|
3372 |
+
"▁pynsakma": 125339,
|
3373 |
+
"▁pynsangeh": 123540,
|
3374 |
+
"▁pynsaphriang": 126278,
|
3375 |
+
"▁pynsh": 123553,
|
3376 |
+
"▁pynshah": 126216,
|
3377 |
+
"▁pynshai": 123279,
|
3378 |
+
"▁pynshisha": 123794,
|
3379 |
+
"▁pynshitom": 123068,
|
3380 |
+
"▁pynshlur": 123944,
|
3381 |
+
"▁pynshngain": 124084,
|
3382 |
+
"▁pynshong": 123815,
|
3383 |
+
"▁pynskhem": 123699,
|
3384 |
+
"▁pynsleh": 125696,
|
3385 |
+
"▁pynslem": 124639,
|
3386 |
+
"▁pynsngew": 123107,
|
3387 |
+
"▁pynsngewbha": 124342,
|
3388 |
+
"▁pynsngewsih": 126255,
|
3389 |
+
"▁pynsngewthuh": 124389,
|
3390 |
+
"▁pynsniew": 125452,
|
3391 |
+
"▁pynst": 124545,
|
3392 |
+
"▁pynsted": 125631,
|
3393 |
+
"▁pynsuk": 124409,
|
3394 |
+
"▁pynt": 123184,
|
3395 |
+
"▁pyntbit": 124999,
|
3396 |
+
"▁pyntha": 124778,
|
3397 |
+
"▁pynthi": 123313,
|
3398 |
+
"▁pynthikna": 123350,
|
3399 |
+
"▁pynthor": 125780,
|
3400 |
+
"▁pynthut": 124898,
|
3401 |
+
"▁pynthymmai": 124594,
|
3402 |
+
"▁pyntieng": 125529,
|
3403 |
+
"▁pyntikna": 125301,
|
3404 |
+
"▁pyntip": 123022,
|
3405 |
+
"▁pyntngen": 125832,
|
3406 |
+
"▁pyntngit": 125453,
|
3407 |
+
"▁pyntrei": 122949,
|
3408 |
+
"▁pyntreikam": 123074,
|
3409 |
+
"▁pyntriem": 124802,
|
3410 |
+
"▁pynurlong": 123952,
|
3411 |
+
"▁pynursla": 124772,
|
3412 |
+
"▁pynwan": 124732,
|
3413 |
+
"▁pynwandur": 125988,
|
3414 |
+
"▁pynwit": 126009,
|
3415 |
+
"▁pynï": 123080,
|
3416 |
+
"▁pynïaid": 124190,
|
3417 |
+
"▁pynïap": 123410,
|
3418 |
+
"▁pynïasoh": 126370,
|
3419 |
+
"▁pynïeng": 126364,
|
3420 |
+
"▁pynïoh": 126183,
|
3421 |
+
"▁pyrda": 124401,
|
3422 |
+
"▁pyrkhat": 122940,
|
3423 |
+
"▁pyrkhing": 124087,
|
3424 |
+
"▁pyrsa": 124966,
|
3425 |
+
"▁pyrshah": 122762,
|
3426 |
+
"▁pyrshang": 123061,
|
3427 |
+
"▁pyrta": 123549,
|
3428 |
+
"▁pyrthat": 125952,
|
3429 |
+
"▁pyrthei": 122809,
|
3430 |
+
"▁pyut": 125285,
|
3431 |
+
"▁radbah": 124998,
|
3432 |
+
"▁rahbor": 124240,
|
3433 |
+
"▁raij": 126100,
|
3434 |
+
"▁rangbah": 122748,
|
3435 |
+
"▁rangbahduh": 125063,
|
3436 |
+
"▁rangialehkai": 126540,
|
3437 |
+
"▁rangli": 126517,
|
3438 |
+
"▁rani": 126120,
|
3439 |
+
"▁rashon": 125558,
|
3440 |
+
"▁rawh": 124096,
|
3441 |
+
"▁rben": 126032,
|
3442 |
+
"▁rd": 126659,
|
3443 |
+
"▁rejistar": 126254,
|
3444 |
+
"▁riam": 123803,
|
3445 |
+
"▁riang": 123858,
|
3446 |
+
"▁riat": 125049,
|
3447 |
+
"▁ribhoi": 126010,
|
3448 |
+
"▁rieh": 123662,
|
3449 |
+
"▁riew": 122819,
|
3450 |
+
"▁riewkynthei": 125156,
|
3451 |
+
"▁riewlum": 123595,
|
3452 |
+
"▁riewngeit": 125163,
|
3453 |
+
"▁riewpaidbah": 125636,
|
3454 |
+
"▁riews": 124608,
|
3455 |
+
"▁riewspah": 123993,
|
3456 |
+
"▁riewstad": 125648,
|
3457 |
+
"▁rilum": 124957,
|
3458 |
+
"▁ringmraw": 124684,
|
3459 |
+
"▁rip": 125287,
|
3460 |
+
"▁ripod": 125701,
|
3461 |
+
"▁risa": 125511,
|
3462 |
+
"▁risain": 124924,
|
3463 |
+
"▁risaw": 124656,
|
3464 |
+
"▁rish": 124745,
|
3465 |
+
"▁rishot": 124756,
|
3466 |
+
"▁riti": 123357,
|
3467 |
+
"▁rkhi": 124125,
|
3468 |
+
"▁rkhie": 124949,
|
3469 |
+
"▁rm": 125035,
|
3470 |
+
"▁rmiang": 125725,
|
3471 |
+
"▁rnga": 125812,
|
3472 |
+
"▁rngai": 125320,
|
3473 |
+
"▁rngiew": 126207,
|
3474 |
+
"▁rnong": 124079,
|
3475 |
+
"▁ron": 124585,
|
3476 |
+
"▁roy": 124475,
|
3477 |
+
"▁rti": 126542,
|
3478 |
+
"▁rud": 124159,
|
3479 |
+
"▁rukom": 122858,
|
3480 |
+
"▁runar": 123300,
|
3481 |
+
"▁rupa": 123527,
|
3482 |
+
"▁rupang": 124356,
|
3483 |
+
"▁rwai": 123405,
|
3484 |
+
"▁rymbai": 125330,
|
3485 |
+
"▁rymphang": 124681,
|
3486 |
+
"▁ryndang": 124931,
|
3487 |
+
"▁ryng": 124578,
|
3488 |
+
"▁ryngk": 124435,
|
3489 |
+
"▁ryngkat": 122831,
|
3490 |
+
"▁ryngkew": 126437,
|
3491 |
+
"▁rynsan": 124144,
|
3492 |
+
"▁rynt": 123290,
|
3493 |
+
"▁ryntieh": 125340,
|
3494 |
+
"▁ryntih": 123430,
|
3495 |
+
"▁sabbaton": 125577,
|
3496 |
+
"▁sabha": 125803,
|
3497 |
+
"▁sacrific": 125745,
|
3498 |
+
"▁sahit": 124739,
|
3499 |
+
"▁sahkut": 124479,
|
3500 |
+
"▁sahlang": 123427,
|
3501 |
+
"▁sahnarphna": 126029,
|
3502 |
+
"▁sahteng": 124850,
|
3503 |
+
"▁sainar": 125322,
|
3504 |
+
"▁saindur": 125949,
|
3505 |
+
"▁saitjain": 124008,
|
3506 |
+
"▁saiñ": 123866,
|
3507 |
+
"▁sakma": 125903,
|
3508 |
+
"▁salia": 125076,
|
3509 |
+
"▁salonsar": 126356,
|
3510 |
+
"▁samaria": 125364,
|
3511 |
+
"▁samla": 122776,
|
3512 |
+
"▁samoi": 123596,
|
3513 |
+
"▁samuel": 124779,
|
3514 |
+
"▁sanbor": 126180,
|
3515 |
+
"▁sangeh": 123270,
|
3516 |
+
"▁sangma": 123208,
|
3517 |
+
"▁sani": 124768,
|
3518 |
+
"▁sanphew": 124210,
|
3519 |
+
"▁saph": 124568,
|
3520 |
+
"▁saphriang": 124757,
|
3521 |
+
"▁sarong": 124131,
|
3522 |
+
"▁satia": 122934,
|
3523 |
+
"▁saul": 123818,
|
3524 |
+
"▁sawa": 125073,
|
3525 |
+
"▁sawdong": 123185,
|
3526 |
+
"▁sawi": 124828,
|
3527 |
+
"▁sawphew": 124174,
|
3528 |
+
"▁sawspah": 126429,
|
3529 |
+
"▁sbai": 124534,
|
3530 |
+
"▁sboh": 124825,
|
3531 |
+
"▁sbun": 126112,
|
3532 |
+
"▁sdang": 122832,
|
3533 |
+
"▁sdien": 126017,
|
3534 |
+
"▁secretariat": 125829,
|
3535 |
+
"▁sein": 126173,
|
3536 |
+
"▁seisoh": 124843,
|
3537 |
+
"▁sel": 126646,
|
3538 |
+
"▁semifinal": 126599,
|
3539 |
+
"▁sengbhalang": 123232,
|
3540 |
+
"▁sengkmie": 125790,
|
3541 |
+
"▁sepngi": 123917,
|
3542 |
+
"▁shaba": 124181,
|
3543 |
+
"▁shabar": 123062,
|
3544 |
+
"▁shadem": 125137,
|
3545 |
+
"▁shadien": 124344,
|
3546 |
+
"▁shadkmen": 126598,
|
3547 |
+
"▁shadong": 125262,
|
3548 |
+
"▁shaduh": 123407,
|
3549 |
+
"▁shaei": 124563,
|
3550 |
+
"▁shahjop": 125939,
|
3551 |
+
"▁shahshitom": 125136,
|
3552 |
+
"▁shahshkor": 123352,
|
3553 |
+
"▁shaid": 124430,
|
3554 |
+
"▁shait": 124109,
|
3555 |
+
"▁shajrong": 124742,
|
3556 |
+
"▁shaka": 125146,
|
3557 |
+
"▁shakri": 122982,
|
3558 |
+
"▁shal": 124719,
|
3559 |
+
"▁shalan": 124307,
|
3560 |
+
"▁shalor": 126097,
|
3561 |
+
"▁shan": 125057,
|
3562 |
+
"▁shane": 124484,
|
3563 |
+
"▁shaneng": 124559,
|
3564 |
+
"▁shang": 123784,
|
3565 |
+
"▁shangpliang": 126444,
|
3566 |
+
"▁shaniah": 123349,
|
3567 |
+
"▁shano": 123742,
|
3568 |
+
"▁shap": 125792,
|
3569 |
+
"▁shaphang": 122852,
|
3570 |
+
"▁shaphrang": 124573,
|
3571 |
+
"▁shapoh": 123164,
|
3572 |
+
"▁sharak": 124354,
|
3573 |
+
"▁sharud": 125670,
|
3574 |
+
"▁sharum": 125362,
|
3575 |
+
"▁shata": 125273,
|
3576 |
+
"▁shatei": 123330,
|
3577 |
+
"▁shathie": 124072,
|
3578 |
+
"▁shatri": 126638,
|
3579 |
+
"▁shaw": 125369,
|
3580 |
+
"▁shawei": 125286,
|
3581 |
+
"▁shella": 125229,
|
3582 |
+
"▁shemphang": 124026,
|
3583 |
+
"▁sheptieng": 123371,
|
3584 |
+
"▁sher": 124428,
|
3585 |
+
"▁shiah": 124605,
|
3586 |
+
"▁shibnai": 124685,
|
3587 |
+
"▁shibun": 122950,
|
3588 |
+
"▁shibynta": 124532,
|
3589 |
+
"▁shihajar": 125473,
|
3590 |
+
"▁shik": 124951,
|
3591 |
+
"▁shikat": 124235,
|
3592 |
+
"▁shikatdei": 124426,
|
3593 |
+
"▁shikol": 126289,
|
3594 |
+
"▁shil": 124104,
|
3595 |
+
"▁shiliang": 123559,
|
3596 |
+
"▁shilliang": 125060,
|
3597 |
+
"▁shillong": 123003,
|
3598 |
+
"▁shimet": 123177,
|
3599 |
+
"▁shimkhia": 123521,
|
3600 |
+
"▁shimti": 123488,
|
3601 |
+
"▁shin": 124906,
|
3602 |
+
"▁shini": 125882,
|
3603 |
+
"▁shipai": 122917,
|
3604 |
+
"▁shipara": 126410,
|
3605 |
+
"▁shiphang": 125678,
|
3606 |
+
"▁shiphew": 123484,
|
3607 |
+
"▁shipor": 123586,
|
3608 |
+
"▁shisien": 123182,
|
3609 |
+
"▁shisnem": 123807,
|
3610 |
+
"▁shisngi": 124137,
|
3611 |
+
"▁shispah": 124025,
|
3612 |
+
"▁shisyndon": 124581,
|
3613 |
+
"▁shit": 124171,
|
3614 |
+
"▁shitaiew": 126387,
|
3615 |
+
"▁shiteng": 123142,
|
3616 |
+
"▁shithi": 123178,
|
3617 |
+
"▁shitom": 123048,
|
3618 |
+
"▁shitrhem": 125442,
|
3619 |
+
"▁shkor": 124203,
|
3620 |
+
"▁shla": 125152,
|
3621 |
+
"▁shlei": 125091,
|
3622 |
+
"▁shlem": 125849,
|
3623 |
+
"▁shlur": 124469,
|
3624 |
+
"▁shn": 125147,
|
3625 |
+
"▁shna": 122911,
|
3626 |
+
"▁shnat": 125205,
|
3627 |
+
"▁shng": 124014,
|
3628 |
+
"▁shngain": 124422,
|
3629 |
+
"▁shnong": 122721,
|
3630 |
+
"▁shon": 124219,
|
3631 |
+
"▁shong": 122744,
|
3632 |
+
"▁shongdor": 124876,
|
3633 |
+
"▁shongkha": 124045,
|
3634 |
+
"▁shongkurim": 123738,
|
3635 |
+
"▁shongs": 123703,
|
3636 |
+
"▁shongsain": 125417,
|
3637 |
+
"▁shongshit": 125539,
|
3638 |
+
"▁shongsuk": 124527,
|
3639 |
+
"▁shongthait": 124194,
|
3640 |
+
"▁shri": 125551,
|
3641 |
+
"▁shuki": 123380,
|
3642 |
+
"▁shukor": 123841,
|
3643 |
+
"▁shul": 126067,
|
3644 |
+
"▁shullai": 126150,
|
3645 |
+
"▁shun": 125739,
|
3646 |
+
"▁shuti": 124848,
|
3647 |
+
"▁shuwa": 122968,
|
3648 |
+
"▁shwa": 123017,
|
3649 |
+
"▁shy": 123234,
|
3650 |
+
"▁shyiap": 123401,
|
3651 |
+
"▁shyieng": 124370,
|
3652 |
+
"▁shylla": 124018,
|
3653 |
+
"▁shym": 122773,
|
3654 |
+
"▁shynrang": 122920,
|
3655 |
+
"▁shyntur": 124083,
|
3656 |
+
"▁shyrkhei": 123667,
|
3657 |
+
"▁shñiuh": 126443,
|
3658 |
+
"▁sian": 125484,
|
3659 |
+
"▁siang": 124308,
|
3660 |
+
"▁siat": 123416,
|
3661 |
+
"▁sieh": 125534,
|
3662 |
+
"▁siej": 125115,
|
3663 |
+
"▁sienjam": 124215,
|
3664 |
+
"▁siew": 122978,
|
3665 |
+
"▁sima": 126447,
|
3666 |
+
"▁singh": 124689,
|
3667 |
+
"▁skhem": 123341,
|
3668 |
+
"▁skhim": 123139,
|
3669 |
+
"▁skim": 124499,
|
3670 |
+
"▁skulbah": 125172,
|
3671 |
+
"▁skum": 125334,
|
3672 |
+
"▁sliang": 126618,
|
3673 |
+
"▁sloit": 126371,
|
3674 |
+
"▁sma": 125685,
|
3675 |
+
"▁smai": 123950,
|
3676 |
+
"▁snam": 123292,
|
3677 |
+
"▁snar": 126084,
|
3678 |
+
"▁snem": 122739,
|
3679 |
+
"▁sneng": 124683,
|
3680 |
+
"▁snep": 125602,
|
3681 |
+
"▁sngap": 123202,
|
3682 |
+
"▁sngew": 122740,
|
3683 |
+
"▁sngewbha": 122959,
|
3684 |
+
"▁sngewdei": 126400,
|
3685 |
+
"▁sngewhun": 126049,
|
3686 |
+
"▁sngewkhia": 124707,
|
3687 |
+
"▁sngewkmen": 125749,
|
3688 |
+
"▁sngewlem": 125125,
|
3689 |
+
"▁sngewlyngngoh": 126462,
|
3690 |
+
"▁sngewnguh": 125494,
|
3691 |
+
"▁sngewrit": 126662,
|
3692 |
+
"▁sngews": 123873,
|
3693 |
+
"▁sngewsarong": 125048,
|
3694 |
+
"▁sngewsih": 123307,
|
3695 |
+
"▁sngewsynei": 126422,
|
3696 |
+
"▁sngewt": 123912,
|
3697 |
+
"▁sngewthuh": 122991,
|
3698 |
+
"▁sngewtynnad": 124511,
|
3699 |
+
"▁sngewtynnat": 125756,
|
3700 |
+
"▁sngi": 122709,
|
3701 |
+
"▁sngur": 125972,
|
3702 |
+
"▁sniang": 124143,
|
3703 |
+
"▁snieh": 124022,
|
3704 |
+
"▁sniehdoh": 125575,
|
3705 |
+
"▁sniew": 123013,
|
3706 |
+
"▁snoh": 125043,
|
3707 |
+
"▁sobjek": 125464,
|
3708 |
+
"▁soc": 123657,
|
3709 |
+
"▁sohiong": 126317,
|
3710 |
+
"▁sohra": 123783,
|
3711 |
+
"▁sohwain": 124310,
|
3712 |
+
"▁sohwaiñ": 125064,
|
3713 |
+
"▁soitan": 124515,
|
3714 |
+
"▁solomon": 124106,
|
3715 |
+
"▁sons": 125651,
|
3716 |
+
"▁sopti": 125011,
|
3717 |
+
"▁sordar": 123948,
|
3718 |
+
"▁sorkar": 122714,
|
3719 |
+
"▁sos": 124935,
|
3720 |
+
"▁soskular": 125105,
|
3721 |
+
"▁sosp": 125606,
|
3722 |
+
"▁sospon": 125869,
|
3723 |
+
"▁sot": 125143,
|
3724 |
+
"▁spah": 123105,
|
3725 |
+
"▁ssa": 126341,
|
3726 |
+
"▁sta": 124792,
|
3727 |
+
"▁sted": 124661,
|
3728 |
+
"▁stet": 124544,
|
3729 |
+
"▁sti": 125326,
|
3730 |
+
"▁stieh": 125239,
|
3731 |
+
"▁sting": 124918,
|
3732 |
+
"▁sual": 125981,
|
3733 |
+
"▁suda": 124326,
|
3734 |
+
"▁suid": 125876,
|
3735 |
+
"▁suin": 124253,
|
3736 |
+
"▁suki": 124164,
|
3737 |
+
"▁suloi": 124862,
|
3738 |
+
"▁superintendent": 126249,
|
3739 |
+
"▁supreme": 125040,
|
3740 |
+
"▁sy": 123376,
|
3741 |
+
"▁syiar": 124211,
|
3742 |
+
"▁syiem": 122767,
|
3743 |
+
"▁syiemlieh": 126170,
|
3744 |
+
"▁syier": 126059,
|
3745 |
+
"▁syllok": 126204,
|
3746 |
+
"▁symbai": 123947,
|
3747 |
+
"▁symbud": 123990,
|
3748 |
+
"▁sympat": 125598,
|
3749 |
+
"▁synagog": 125926,
|
3750 |
+
"▁syndah": 124672,
|
3751 |
+
"▁syndon": 123364,
|
3752 |
+
"▁synduk": 124147,
|
3753 |
+
"▁syngk": 124699,
|
3754 |
+
"▁synjuk": 123766,
|
3755 |
+
"▁synn": 123725,
|
3756 |
+
"▁synniang": 123940,
|
3757 |
+
"▁synod": 126287,
|
3758 |
+
"▁synr": 123084,
|
3759 |
+
"▁synrai": 126614,
|
3760 |
+
"▁synran": 123281,
|
3761 |
+
"▁synreit": 125451,
|
3762 |
+
"▁synrop": 125526,
|
3763 |
+
"▁synsar": 126262,
|
3764 |
+
"▁synshar": 122879,
|
3765 |
+
"▁synt": 125497,
|
3766 |
+
"▁syntiew": 124005,
|
3767 |
+
"▁syria": 125256,
|
3768 |
+
"▁syriem": 126358,
|
3769 |
+
"▁syrngiew": 125264,
|
3770 |
+
"▁syrnod": 126316,
|
3771 |
+
"▁taiew": 123244,
|
3772 |
+
"▁takma": 124390,
|
3773 |
+
"▁taksi": 126382,
|
3774 |
+
"▁talasi": 125938,
|
3775 |
+
"▁tamasa": 123845,
|
3776 |
+
"▁tangba": 123865,
|
3777 |
+
"▁tawh": 124368,
|
3778 |
+
"▁taxi": 125881,
|
3779 |
+
"▁tbian": 126212,
|
3780 |
+
"▁tbit": 124057,
|
3781 |
+
"▁tdem": 125343,
|
3782 |
+
"▁tduh": 125440,
|
3783 |
+
"▁tehlakam": 124095,
|
3784 |
+
"▁templ": 123278,
|
3785 |
+
"▁thaba": 126446,
|
3786 |
+
"▁thad": 125202,
|
3787 |
+
"▁thah": 126202,
|
3788 |
+
"▁thain": 122815,
|
3789 |
+
"▁thait": 124387,
|
3790 |
+
"▁thaiñ": 123217,
|
3791 |
+
"▁thala": 124586,
|
3792 |
+
"▁thanad": 126639,
|
3793 |
+
"▁thap": 124373,
|
3794 |
+
"▁thapniang": 125471,
|
3795 |
+
"▁tharai": 124321,
|
3796 |
+
"▁thawain": 124750,
|
3797 |
+
"▁theih": 124284,
|
3798 |
+
"▁thiah": 123362,
|
3799 |
+
"▁thiang": 124945,
|
3800 |
+
"▁thiat": 125582,
|
3801 |
+
"▁thiaw": 124379,
|
3802 |
+
"▁thied": 123071,
|
3803 |
+
"▁thikna": 123675,
|
3804 |
+
"▁thir": 126615,
|
3805 |
+
"▁thl": 123324,
|
3806 |
+
"▁thla": 125984,
|
3807 |
+
"▁thlah": 125755,
|
3808 |
+
"▁thlen": 125563,
|
3809 |
+
"▁thleng": 125039,
|
3810 |
+
"▁thliew": 123775,
|
3811 |
+
"▁thma": 123092,
|
3812 |
+
"▁thmu": 123090,
|
3813 |
+
"▁thngan": 123898,
|
3814 |
+
"▁thoh": 122889,
|
3815 |
+
"▁thohdieng": 124785,
|
3816 |
+
"▁thok": 123792,
|
3817 |
+
"▁thom": 126649,
|
3818 |
+
"▁thombor": 123730,
|
3819 |
+
"▁thrang": 125055,
|
3820 |
+
"▁thuh": 124751,
|
3821 |
+
"▁thung": 122990,
|
3822 |
+
"▁thungkam": 124653,
|
3823 |
+
"▁thur": 126143,
|
3824 |
+
"▁thwet": 124860,
|
3825 |
+
"▁thyll": 124282,
|
3826 |
+
"▁thylli": 124416,
|
3827 |
+
"▁thylliej": 124328,
|
3828 |
+
"▁thymmai": 122895,
|
3829 |
+
"▁thymmei": 124646,
|
3830 |
+
"▁tiar": 123049,
|
3831 |
+
"▁tieng": 124015,
|
3832 |
+
"▁tiew": 126035,
|
3833 |
+
"▁tiket": 123600,
|
3834 |
+
"▁tikna": 125634,
|
3835 |
+
"▁titos": 126477,
|
3836 |
+
"▁tlang": 124305,
|
3837 |
+
"▁tlot": 123989,
|
3838 |
+
"▁tnad": 122821,
|
3839 |
+
"▁tnat": 123053,
|
3840 |
+
"▁tnga": 123131,
|
3841 |
+
"▁tnum": 124609,
|
3842 |
+
"▁tohkit": 123438,
|
3843 |
+
"▁trei": 122770,
|
3844 |
+
"▁treikam": 122984,
|
3845 |
+
"▁treilang": 124637,
|
3846 |
+
"▁trep": 125461,
|
3847 |
+
"▁tribe": 125883,
|
3848 |
+
"▁tuid": 123732,
|
3849 |
+
"▁tuklar": 123966,
|
3850 |
+
"▁tulop": 123432,
|
3851 |
+
"▁tura": 124492,
|
3852 |
+
"▁turoi": 124859,
|
3853 |
+
"▁twa": 125928,
|
3854 |
+
"▁twad": 126345,
|
3855 |
+
"▁tyll": 125316,
|
3856 |
+
"▁tyllai": 125240,
|
3857 |
+
"▁tyllep": 125845,
|
3858 |
+
"▁tylli": 122752,
|
3859 |
+
"▁tyllong": 123745,
|
3860 |
+
"▁tymmen": 123088,
|
3861 |
+
"▁tymp": 126455,
|
3862 |
+
"▁tyng": 122845,
|
3863 |
+
"▁tyngeh": 123285,
|
3864 |
+
"▁tyngk": 124870,
|
3865 |
+
"▁tyngka": 122944,
|
3866 |
+
"▁tyngkai": 126213,
|
3867 |
+
"▁tyngkhuh": 125891,
|
3868 |
+
"▁tyngshain": 126186,
|
3869 |
+
"▁tyngshop": 126296,
|
3870 |
+
"▁tynjuh": 124982,
|
3871 |
+
"▁tynnad": 125819,
|
3872 |
+
"▁tynr": 123153,
|
3873 |
+
"▁tynrah": 125313,
|
3874 |
+
"▁tynrai": 123263,
|
3875 |
+
"▁tynsong": 124629,
|
3876 |
+
"▁tyr": 123810,
|
3877 |
+
"▁tyrkhong": 124782,
|
3878 |
+
"▁tyrp": 126237,
|
3879 |
+
"▁tyrpeng": 126378,
|
3880 |
+
"▁tyrwa": 123875,
|
3881 |
+
"▁tû": 123546,
|
3882 |
+
"▁tûr": 123969,
|
3883 |
+
"▁tûrin": 125573,
|
3884 |
+
"▁udei": 125772,
|
3885 |
+
"▁udp": 123366,
|
3886 |
+
"▁uei": 124160,
|
3887 |
+
"▁ujor": 123083,
|
3888 |
+
"▁umbam": 125525,
|
3889 |
+
"▁umdih": 125109,
|
3890 |
+
"▁umiam": 125930,
|
3891 |
+
"▁ummat": 125037,
|
3892 |
+
"▁umphniang": 123671,
|
3893 |
+
"▁umpohliew": 125713,
|
3894 |
+
"▁umroi": 125904,
|
3895 |
+
"▁umsning": 124884,
|
3896 |
+
"▁united": 125254,
|
3897 |
+
"▁upper": 125001,
|
3898 |
+
"▁uranium": 124619,
|
3899 |
+
"▁urlong": 124027,
|
3900 |
+
"▁utei": 122972,
|
3901 |
+
"▁uto": 126187,
|
3902 |
+
"▁uwei": 122769,
|
3903 |
+
"▁vek": 125612,
|
3904 |
+
"▁wahduid": 125079,
|
3905 |
+
"▁waheh": 125131,
|
3906 |
+
"▁wahlang": 125600,
|
3907 |
+
"▁wain": 124234,
|
3908 |
+
"▁waitlam": 124036,
|
3909 |
+
"▁waiñ": 125006,
|
3910 |
+
"▁wallam": 123122,
|
3911 |
+
"▁wanphai": 124886,
|
3912 |
+
"▁wanrah": 122838,
|
3913 |
+
"▁warjri": 125214,
|
3914 |
+
"▁watla": 123554,
|
3915 |
+
"▁wer": 123315,
|
3916 |
+
"▁wine": 126284,
|
3917 |
+
"▁wit": 126209,
|
3918 |
+
"▁woh": 126607,
|
3919 |
+
"▁worship": 126062,
|
3920 |
+
"▁wut": 124161,
|
3921 |
+
"▁ynda": 123190,
|
3922 |
+
"▁yo": 122807,
|
3923 |
+
"▁zawk": 126407,
|
3924 |
+
"▁zawng": 123551,
|
3925 |
+
"▁zâw": 125561,
|
3926 |
+
"▁Ïa": 123535,
|
3927 |
+
"▁Ïing": 125251,
|
3928 |
+
"▁êm": 126286,
|
3929 |
+
"▁ïa": 122707,
|
3930 |
+
"▁ïabit": 125171,
|
3931 |
+
"▁ïada": 124497,
|
3932 |
+
"▁ïadei": 123996,
|
3933 |
+
"▁ïadon": 126089,
|
3934 |
+
"▁ïai": 123572,
|
3935 |
+
"▁ïaid": 123498,
|
3936 |
+
"▁ïaineh": 126545,
|
3937 |
+
"▁ïak": 123418,
|
3938 |
+
"▁ïaka": 123289,
|
3939 |
+
"▁ïakhun": 124690,
|
3940 |
+
"▁ïaki": 123712,
|
3941 |
+
"▁ïakren": 124538,
|
3942 |
+
"▁ïakynduh": 124762,
|
3943 |
+
"▁ïalade": 126125,
|
3944 |
+
"▁ïalam": 123630,
|
3945 |
+
"▁ïalap": 126505,
|
3946 |
+
"▁ïaleh": 123573,
|
3947 |
+
"▁ïalehkai": 123903,
|
3948 |
+
"▁ïam": 124799,
|
3949 |
+
"▁ïano": 124483,
|
3950 |
+
"▁ïap": 123257,
|
3951 |
+
"▁ïar": 126496,
|
3952 |
+
"▁ïarap": 123961,
|
3953 |
+
"▁ïaroh": 124552,
|
3954 |
+
"▁ïas": 124334,
|
3955 |
+
"▁ïashim": 125509,
|
3956 |
+
"▁ïat": 124481,
|
3957 |
+
"▁ïathuh": 123297,
|
3958 |
+
"▁ïeng": 123720,
|
3959 |
+
"▁ïew": 125422,
|
3960 |
+
"▁ïing": 122983,
|
3961 |
+
"▁ïingjaiñ": 125385,
|
3962 |
+
"▁ïingsyiem": 126231,
|
3963 |
+
"▁ïoh": 122871,
|
3964 |
+
"▁ïohi": 123280,
|
3965 |
+
"▁ïohpdiang": 125941,
|
3966 |
+
"▁ïohsngew": 123822,
|
3967 |
+
"▁ñ": 126568,
|
3968 |
+
"▁ñi": 124546,
|
3969 |
+
"▁ñiew": 124231,
|
3970 |
+
"▁ñiut": 124731,
|
3971 |
+
"▁̃": 122746,
|
3972 |
+
"▁͂": 125506,
|
3973 |
+
"▁і": 123510,
|
3974 |
+
"▁ṭha": 126327,
|
3975 |
+
"▁ṭhîn": 125544,
|
3976 |
+
"▁‟": 125889,
|
3977 |
+
"▁′": 124887,
|
3978 |
+
"⚔": 126677,
|
3979 |
+
"✝": 126679,
|
3980 |
+
"䬠": 126683,
|
3981 |
+
"📷": 126684,
|
3982 |
+
"🛐": 126685
|
3983 |
+
}
|
config.json
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "indictrans-ne-en-checkpoint-1B/checkpoint-4000",
|
3 |
+
"activation_dropout": 0.0,
|
4 |
+
"activation_function": "gelu",
|
5 |
+
"architectures": [
|
6 |
+
"IndicTransForConditionalGeneration"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.0,
|
9 |
+
"attn_implementation": null,
|
10 |
+
"auto_map": {
|
11 |
+
"AutoConfig": "configuration_indictrans.IndicTransConfig",
|
12 |
+
"AutoModelForSeq2SeqLM": "modeling_indictrans.IndicTransForConditionalGeneration"
|
13 |
+
},
|
14 |
+
"bos_token_id": 0,
|
15 |
+
"decoder_attention_heads": 16,
|
16 |
+
"decoder_embed_dim": 1024,
|
17 |
+
"decoder_ffn_dim": 8192,
|
18 |
+
"decoder_layerdrop": 0,
|
19 |
+
"decoder_layers": 18,
|
20 |
+
"decoder_normalize_before": true,
|
21 |
+
"decoder_start_token_id": 2,
|
22 |
+
"decoder_vocab_size": 32296,
|
23 |
+
"dropout": 0.2,
|
24 |
+
"encoder_attention_heads": 16,
|
25 |
+
"encoder_embed_dim": 1024,
|
26 |
+
"encoder_ffn_dim": 8192,
|
27 |
+
"encoder_layerdrop": 0,
|
28 |
+
"encoder_layers": 18,
|
29 |
+
"encoder_normalize_before": true,
|
30 |
+
"encoder_vocab_size": 126687,
|
31 |
+
"eos_token_id": 2,
|
32 |
+
"init_std": 0.02,
|
33 |
+
"is_encoder_decoder": true,
|
34 |
+
"layernorm_embedding": false,
|
35 |
+
"max_source_positions": 256,
|
36 |
+
"max_target_positions": 256,
|
37 |
+
"model_type": "IndicTrans",
|
38 |
+
"num_hidden_layers": 18,
|
39 |
+
"pad_token_id": 1,
|
40 |
+
"scale_embedding": true,
|
41 |
+
"share_decoder_input_output_embed": false,
|
42 |
+
"tokenizer_class": "IndicTransTokenizer",
|
43 |
+
"torch_dtype": "bfloat16",
|
44 |
+
"transformers_version": "4.44.2",
|
45 |
+
"use_cache": true
|
46 |
+
}
|
configuration_indictrans.py
ADDED
@@ -0,0 +1,309 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2023 The IndicTrans2 Authors and AI4Bharat team. All rights reserved.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
""" PyTorch IndicTrans config."""
|
16 |
+
|
17 |
+
|
18 |
+
from collections import OrderedDict
|
19 |
+
from typing import Any, Mapping, Optional
|
20 |
+
|
21 |
+
from transformers import PreTrainedTokenizer
|
22 |
+
from transformers.configuration_utils import PretrainedConfig
|
23 |
+
from transformers.onnx import OnnxConfig, OnnxSeq2SeqConfigWithPast
|
24 |
+
from transformers.onnx.utils import compute_effective_axis_dimension
|
25 |
+
from transformers.utils import TensorType, is_torch_available
|
26 |
+
|
27 |
+
|
28 |
+
# Copied from transformers.models.m2m_100.configuration_m2m_100.M2M100Config->IndicTrans
|
29 |
+
class IndicTransConfig(PretrainedConfig):
|
30 |
+
r"""
|
31 |
+
This is the configuration class to store the configuration of a [`IT2Model`]. It is used to instantiate an
|
32 |
+
IT2 model according to the specified arguments, defining the model architecture. Instantiating a configuration
|
33 |
+
with the defaults will yield a similar configuration to that of the IT2
|
34 |
+
|
35 |
+
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
|
36 |
+
documentation from [`PretrainedConfig`] for more information.
|
37 |
+
|
38 |
+
|
39 |
+
Args:
|
40 |
+
vocab_size (`int`, *optional*, defaults to 50265):
|
41 |
+
Vocabulary size of the IT2 model. Defines the number of different tokens that can be represented by the
|
42 |
+
`inputs_ids` passed when calling [`IT2Model`] or
|
43 |
+
d_model (`int`, *optional*, defaults to 1024):
|
44 |
+
Dimensionality of the layers and the pooler layer.
|
45 |
+
encoder_layers (`int`, *optional*, defaults to 12):
|
46 |
+
Number of encoder layers.
|
47 |
+
decoder_layers (`int`, *optional*, defaults to 12):
|
48 |
+
Number of decoder layers.
|
49 |
+
encoder_attention_heads (`int`, *optional*, defaults to 16):
|
50 |
+
Number of attention heads for each attention layer in the Transformer encoder.
|
51 |
+
decoder_attention_heads (`int`, *optional*, defaults to 16):
|
52 |
+
Number of attention heads for each attention layer in the Transformer decoder.
|
53 |
+
decoder_ffn_dim (`int`, *optional*, defaults to 4096):
|
54 |
+
Dimensionality of the "intermediate" (often named feed-forward) layer in decoder.
|
55 |
+
encoder_ffn_dim (`int`, *optional*, defaults to 4096):
|
56 |
+
Dimensionality of the "intermediate" (often named feed-forward) layer in decoder.
|
57 |
+
activation_function (`str` or `function`, *optional*, defaults to `"gelu"`):
|
58 |
+
The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
|
59 |
+
`"relu"`, `"silu"` and `"gelu_new"` are supported.
|
60 |
+
dropout (`float`, *optional*, defaults to 0.1):
|
61 |
+
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
|
62 |
+
attention_dropout (`float`, *optional*, defaults to 0.0):
|
63 |
+
The dropout ratio for the attention probabilities.
|
64 |
+
activation_dropout (`float`, *optional*, defaults to 0.0):
|
65 |
+
The dropout ratio for activations inside the fully connected layer.
|
66 |
+
classifier_dropout (`float`, *optional*, defaults to 0.0):
|
67 |
+
The dropout ratio for classifier.
|
68 |
+
max_position_embeddings (`int`, *optional*, defaults to 1024):
|
69 |
+
The maximum sequence length that this model might ever be used with. Typically set this to something large
|
70 |
+
just in case (e.g., 512 or 1024 or 2048).
|
71 |
+
init_std (`float`, *optional*, defaults to 0.02):
|
72 |
+
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
73 |
+
encoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
74 |
+
The LayerDrop probability for the encoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
|
75 |
+
for more details.
|
76 |
+
decoder_layerdrop (`float`, *optional*, defaults to 0.0):
|
77 |
+
The LayerDrop probability for the decoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
|
78 |
+
for more details.
|
79 |
+
use_cache (`bool`, *optional*, defaults to `True`):
|
80 |
+
Whether or not the model should return the last key/values attentions (not used by all models).
|
81 |
+
```"""
|
82 |
+
model_type = "IndicTrans"
|
83 |
+
keys_to_ignore_at_inference = ["past_key_values"]
|
84 |
+
attribute_map = {
|
85 |
+
"num_attention_heads": "encoder_attention_heads",
|
86 |
+
"hidden_size": "d_model",
|
87 |
+
}
|
88 |
+
|
89 |
+
def __init__(
|
90 |
+
self,
|
91 |
+
encoder_vocab_size=None,
|
92 |
+
decoder_vocab_size=None,
|
93 |
+
encoder_embed_dim=512,
|
94 |
+
decoder_embed_dim=512,
|
95 |
+
max_source_positions=210,
|
96 |
+
max_target_positions=210,
|
97 |
+
encoder_layers=6,
|
98 |
+
encoder_ffn_dim=2048,
|
99 |
+
encoder_attention_heads=8,
|
100 |
+
decoder_layers=6,
|
101 |
+
decoder_ffn_dim=2048,
|
102 |
+
decoder_attention_heads=8,
|
103 |
+
encoder_layerdrop=0.00,
|
104 |
+
decoder_layerdrop=0.00,
|
105 |
+
use_cache=True,
|
106 |
+
is_encoder_decoder=True,
|
107 |
+
activation_function="relu",
|
108 |
+
encoder_normalize_before=False,
|
109 |
+
decoder_normalize_before=False,
|
110 |
+
layernorm_embedding=False,
|
111 |
+
share_decoder_input_output_embed=False,
|
112 |
+
dropout=0.1,
|
113 |
+
attention_dropout=0.0,
|
114 |
+
activation_dropout=0.0,
|
115 |
+
init_std=0.02,
|
116 |
+
scale_embedding=True,
|
117 |
+
decoder_start_token_id=2,
|
118 |
+
pad_token_id=1,
|
119 |
+
bos_token_id=0,
|
120 |
+
eos_token_id=2,
|
121 |
+
attn_implementation="eager",
|
122 |
+
**kwargs,
|
123 |
+
):
|
124 |
+
self.encoder_vocab_size = encoder_vocab_size
|
125 |
+
self.decoder_vocab_size = decoder_vocab_size
|
126 |
+
self.encoder_normalize_before = encoder_normalize_before
|
127 |
+
self.decoder_normalize_before = decoder_normalize_before
|
128 |
+
self.layernorm_embedding = layernorm_embedding
|
129 |
+
self.max_source_positions = max_source_positions
|
130 |
+
self.max_target_positions = max_target_positions
|
131 |
+
self.encoder_embed_dim = encoder_embed_dim
|
132 |
+
self.decoder_embed_dim = decoder_embed_dim
|
133 |
+
self.encoder_ffn_dim = encoder_ffn_dim
|
134 |
+
self.encoder_layers = encoder_layers
|
135 |
+
self.encoder_attention_heads = encoder_attention_heads
|
136 |
+
self.decoder_ffn_dim = decoder_ffn_dim
|
137 |
+
self.decoder_layers = decoder_layers
|
138 |
+
self.decoder_attention_heads = decoder_attention_heads
|
139 |
+
self.dropout = dropout
|
140 |
+
self.attention_dropout = attention_dropout
|
141 |
+
self.activation_dropout = activation_dropout
|
142 |
+
self.activation_function = activation_function
|
143 |
+
self.init_std = init_std
|
144 |
+
self.encoder_layerdrop = encoder_layerdrop
|
145 |
+
self.decoder_layerdrop = decoder_layerdrop
|
146 |
+
self.use_cache = use_cache
|
147 |
+
self.num_hidden_layers = encoder_layers
|
148 |
+
self.scale_embedding = scale_embedding
|
149 |
+
self.share_decoder_input_output_embed = share_decoder_input_output_embed
|
150 |
+
self.attn_implementation = attn_implementation
|
151 |
+
|
152 |
+
super().__init__(
|
153 |
+
pad_token_id=pad_token_id,
|
154 |
+
bos_token_id=bos_token_id,
|
155 |
+
eos_token_id=eos_token_id,
|
156 |
+
is_encoder_decoder=is_encoder_decoder,
|
157 |
+
decoder_start_token_id=decoder_start_token_id,
|
158 |
+
**kwargs,
|
159 |
+
)
|
160 |
+
|
161 |
+
|
162 |
+
class IndicTransOnnxConfig(OnnxSeq2SeqConfigWithPast):
|
163 |
+
@property
|
164 |
+
def inputs(self) -> Mapping[str, Mapping[int, str]]:
|
165 |
+
common_inputs = OrderedDict(
|
166 |
+
[
|
167 |
+
("input_ids", {0: "batch", 1: "encoder_sequence"}),
|
168 |
+
("attention_mask", {0: "batch", 1: "encoder_sequence"}),
|
169 |
+
]
|
170 |
+
)
|
171 |
+
|
172 |
+
if self.use_past:
|
173 |
+
common_inputs["decoder_input_ids"] = {0: "batch"}
|
174 |
+
common_inputs["decoder_attention_mask"] = {
|
175 |
+
0: "batch",
|
176 |
+
1: "past_decoder_sequence + sequence",
|
177 |
+
}
|
178 |
+
else:
|
179 |
+
common_inputs["decoder_input_ids"] = {0: "batch", 1: "decoder_sequence"}
|
180 |
+
common_inputs["decoder_attention_mask"] = {
|
181 |
+
0: "batch",
|
182 |
+
1: "decoder_sequence",
|
183 |
+
}
|
184 |
+
|
185 |
+
if self.use_past:
|
186 |
+
self.fill_with_past_key_values_(common_inputs, direction="inputs")
|
187 |
+
return common_inputs
|
188 |
+
|
189 |
+
# Copied from BartOnnxConfig._generate_dummy_inputs_for_sequence_classification_and_question_answering
|
190 |
+
# A better name would be _generate_dummy_inputs_for_encoder_and_decoder because sequence classification and question
|
191 |
+
# answering are not supported for IT2, but this name is preserved to be able to check that the copy matches what
|
192 |
+
# was done for BART so that it can be updated if need be.
|
193 |
+
def _generate_dummy_inputs_for_sequence_classification_and_question_answering(
|
194 |
+
self,
|
195 |
+
tokenizer: PreTrainedTokenizer,
|
196 |
+
batch_size: int = -1,
|
197 |
+
seq_length: int = -1,
|
198 |
+
is_pair: bool = False,
|
199 |
+
framework: Optional[TensorType] = None,
|
200 |
+
) -> Mapping[str, Any]:
|
201 |
+
# Copied from OnnxConfig.generate_dummy_inputs
|
202 |
+
# Did not use super(OnnxConfigWithPast, self).generate_dummy_inputs for code clarity.
|
203 |
+
# If dynamic axis (-1) we forward with a fixed dimension of 2 samples to avoid optimizations made by ONNX
|
204 |
+
batch_size = compute_effective_axis_dimension(
|
205 |
+
batch_size,
|
206 |
+
fixed_dimension=OnnxConfig.default_fixed_batch,
|
207 |
+
num_token_to_add=0,
|
208 |
+
)
|
209 |
+
|
210 |
+
# If dynamic axis (-1) we forward with a fixed dimension of 8 tokens to avoid optimizations made by ONNX
|
211 |
+
token_to_add = tokenizer.num_special_tokens_to_add(is_pair)
|
212 |
+
seq_length = compute_effective_axis_dimension(
|
213 |
+
seq_length,
|
214 |
+
fixed_dimension=OnnxConfig.default_fixed_sequence,
|
215 |
+
num_token_to_add=token_to_add,
|
216 |
+
)
|
217 |
+
|
218 |
+
# Generate dummy inputs according to compute batch and sequence
|
219 |
+
dummy_input = [" ".join([tokenizer.unk_token]) * seq_length] * batch_size
|
220 |
+
common_inputs = dict(tokenizer(dummy_input, return_tensors=framework))
|
221 |
+
return common_inputs
|
222 |
+
|
223 |
+
# Copied from transformers.models.bart.configuration_bart.BartOnnxConfig._generate_dummy_inputs_for_default_and_seq2seq_lm
|
224 |
+
def _generate_dummy_inputs_for_default_and_seq2seq_lm(
|
225 |
+
self,
|
226 |
+
tokenizer: PreTrainedTokenizer,
|
227 |
+
batch_size: int = -1,
|
228 |
+
seq_length: int = -1,
|
229 |
+
is_pair: bool = False,
|
230 |
+
framework: Optional[TensorType] = None,
|
231 |
+
) -> Mapping[str, Any]:
|
232 |
+
encoder_inputs = self._generate_dummy_inputs_for_sequence_classification_and_question_answering(
|
233 |
+
tokenizer, batch_size, seq_length, is_pair, framework
|
234 |
+
)
|
235 |
+
|
236 |
+
# Generate decoder inputs
|
237 |
+
decoder_seq_length = seq_length if not self.use_past else 1
|
238 |
+
decoder_inputs = self._generate_dummy_inputs_for_sequence_classification_and_question_answering(
|
239 |
+
tokenizer, batch_size, decoder_seq_length, is_pair, framework
|
240 |
+
)
|
241 |
+
decoder_inputs = {
|
242 |
+
f"decoder_{name}": tensor for name, tensor in decoder_inputs.items()
|
243 |
+
}
|
244 |
+
common_inputs = dict(**encoder_inputs, **decoder_inputs)
|
245 |
+
|
246 |
+
if self.use_past:
|
247 |
+
if not is_torch_available():
|
248 |
+
raise ValueError(
|
249 |
+
"Cannot generate dummy past_keys inputs without PyTorch installed."
|
250 |
+
)
|
251 |
+
else:
|
252 |
+
import torch
|
253 |
+
batch, encoder_seq_length = common_inputs["input_ids"].shape
|
254 |
+
decoder_seq_length = common_inputs["decoder_input_ids"].shape[1]
|
255 |
+
(
|
256 |
+
num_encoder_attention_heads,
|
257 |
+
num_decoder_attention_heads,
|
258 |
+
) = self.num_attention_heads
|
259 |
+
encoder_shape = (
|
260 |
+
batch,
|
261 |
+
num_encoder_attention_heads,
|
262 |
+
encoder_seq_length,
|
263 |
+
self._config.hidden_size // num_encoder_attention_heads,
|
264 |
+
)
|
265 |
+
decoder_past_length = decoder_seq_length + 3
|
266 |
+
decoder_shape = (
|
267 |
+
batch,
|
268 |
+
num_decoder_attention_heads,
|
269 |
+
decoder_past_length,
|
270 |
+
self._config.hidden_size // num_decoder_attention_heads,
|
271 |
+
)
|
272 |
+
|
273 |
+
common_inputs["decoder_attention_mask"] = torch.cat(
|
274 |
+
[
|
275 |
+
common_inputs["decoder_attention_mask"],
|
276 |
+
torch.ones(batch, decoder_past_length),
|
277 |
+
],
|
278 |
+
dim=1,
|
279 |
+
)
|
280 |
+
|
281 |
+
common_inputs["past_key_values"] = []
|
282 |
+
# If the number of encoder and decoder layers are present in the model configuration, both are considered
|
283 |
+
num_encoder_layers, num_decoder_layers = self.num_layers
|
284 |
+
min_num_layers = min(num_encoder_layers, num_decoder_layers)
|
285 |
+
max_num_layers = (
|
286 |
+
max(num_encoder_layers, num_decoder_layers) - min_num_layers
|
287 |
+
)
|
288 |
+
remaining_side_name = (
|
289 |
+
"encoder" if num_encoder_layers > num_decoder_layers else "decoder"
|
290 |
+
)
|
291 |
+
|
292 |
+
for _ in range(min_num_layers):
|
293 |
+
common_inputs["past_key_values"].append(
|
294 |
+
(
|
295 |
+
torch.zeros(decoder_shape),
|
296 |
+
torch.zeros(decoder_shape),
|
297 |
+
torch.zeros(encoder_shape),
|
298 |
+
torch.zeros(encoder_shape),
|
299 |
+
)
|
300 |
+
)
|
301 |
+
# TODO: test this.
|
302 |
+
shape = encoder_shape if remaining_side_name == "encoder" else decoder_shape
|
303 |
+
for _ in range(min_num_layers, max_num_layers):
|
304 |
+
common_inputs["past_key_values"].append(
|
305 |
+
(torch.zeros(shape), torch.zeros(shape))
|
306 |
+
)
|
307 |
+
return common_inputs
|
308 |
+
|
309 |
+
generate_dummy_inputs = _generate_dummy_inputs_for_default_and_seq2seq_lm
|
dict.SRC.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
dict.TGT.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
generation_config.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token_id": 0,
|
3 |
+
"decoder_start_token_id": 2,
|
4 |
+
"eos_token_id": 2,
|
5 |
+
"pad_token_id": 1,
|
6 |
+
"transformers_version": "4.44.2"
|
7 |
+
}
|
model.SRC
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac9257c8e76b8b607705b959cc3d075656ea33032f7a974e467b8941df6e98d4
|
3 |
+
size 3256903
|
model.TGT
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3cedc5cbcc740369b76201942a0f096fec7287fee039b55bdb956f301235b914
|
3 |
+
size 759425
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d315f8e48684a55ac46df5e6e3664b10614ac41705520346c34493433f8fbfae
|
3 |
+
size 2054257440
|
modeling_indictrans.py
ADDED
@@ -0,0 +1,1801 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2023 The IndicTrans2 Authors and AI4Bharat team. All rights reserved.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
""" PyTorch IndicTrans model."""
|
16 |
+
|
17 |
+
|
18 |
+
import math
|
19 |
+
from typing import List, Optional, Tuple, Union
|
20 |
+
|
21 |
+
import torch
|
22 |
+
import torch.nn as nn
|
23 |
+
from torch.nn import functional as F
|
24 |
+
|
25 |
+
from transformers.activations import ACT2FN
|
26 |
+
|
27 |
+
from transformers.modeling_attn_mask_utils import (
|
28 |
+
_prepare_4d_attention_mask,
|
29 |
+
_prepare_4d_attention_mask_for_sdpa,
|
30 |
+
_prepare_4d_causal_attention_mask,
|
31 |
+
_prepare_4d_causal_attention_mask_for_sdpa,
|
32 |
+
)
|
33 |
+
|
34 |
+
from transformers.integrations.deepspeed import is_deepspeed_zero3_enabled
|
35 |
+
from transformers.modeling_outputs import (
|
36 |
+
BaseModelOutput,
|
37 |
+
BaseModelOutputWithPastAndCrossAttentions,
|
38 |
+
Seq2SeqLMOutput,
|
39 |
+
Seq2SeqModelOutput
|
40 |
+
)
|
41 |
+
|
42 |
+
from transformers.utils import (
|
43 |
+
logging,
|
44 |
+
is_flash_attn_2_available,
|
45 |
+
is_flash_attn_greater_or_equal_2_10,
|
46 |
+
)
|
47 |
+
|
48 |
+
from transformers.modeling_utils import PreTrainedModel
|
49 |
+
|
50 |
+
from .configuration_indictrans import IndicTransConfig
|
51 |
+
|
52 |
+
|
53 |
+
logger = logging.get_logger(__name__)
|
54 |
+
|
55 |
+
INDICTRANS_PRETRAINED_MODEL_ARCHIVE_LIST = [""]
|
56 |
+
|
57 |
+
try:
|
58 |
+
if is_flash_attn_2_available():
|
59 |
+
from flash_attn import flash_attn_func, flash_attn_varlen_func
|
60 |
+
from flash_attn.bert_padding import index_first_axis, pad_input, unpad_input # noqa
|
61 |
+
except:
|
62 |
+
pass
|
63 |
+
|
64 |
+
|
65 |
+
# Copied from transformers.models.llama.modeling_llama._get_unpad_data
|
66 |
+
def _get_unpad_data(attention_mask):
|
67 |
+
seqlens_in_batch = attention_mask.sum(dim=-1, dtype=torch.int32)
|
68 |
+
indices = torch.nonzero(attention_mask.flatten(), as_tuple=False).flatten()
|
69 |
+
max_seqlen_in_batch = seqlens_in_batch.max().item()
|
70 |
+
cu_seqlens = F.pad(torch.cumsum(seqlens_in_batch, dim=0, dtype=torch.int32), (1, 0))
|
71 |
+
return (
|
72 |
+
indices,
|
73 |
+
cu_seqlens,
|
74 |
+
max_seqlen_in_batch,
|
75 |
+
)
|
76 |
+
|
77 |
+
|
78 |
+
# Copied from transformers.models.bart.modeling_bart.shift_tokens_right
|
79 |
+
def shift_tokens_right(
|
80 |
+
input_ids: torch.Tensor, pad_token_id: int, decoder_start_token_id: int
|
81 |
+
):
|
82 |
+
"""
|
83 |
+
Shift input ids one token to the right.
|
84 |
+
"""
|
85 |
+
shifted_input_ids = input_ids.new_zeros(input_ids.shape)
|
86 |
+
shifted_input_ids[:, 1:] = input_ids[:, :-1].clone()
|
87 |
+
shifted_input_ids[:, 0] = decoder_start_token_id
|
88 |
+
|
89 |
+
if pad_token_id is None:
|
90 |
+
raise ValueError("self.model.config.pad_token_id has to be defined.")
|
91 |
+
# replace possible -100 values in labels by `pad_token_id`
|
92 |
+
shifted_input_ids.masked_fill_(shifted_input_ids == -100, pad_token_id)
|
93 |
+
|
94 |
+
return shifted_input_ids
|
95 |
+
|
96 |
+
|
97 |
+
def create_position_ids_from_input_ids(
|
98 |
+
input_ids, padding_idx, past_key_values_length=0
|
99 |
+
):
|
100 |
+
"""
|
101 |
+
Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
|
102 |
+
are ignored. This is modified from fairseq's `utils.make_positions`.
|
103 |
+
"""
|
104 |
+
# The series of casts and type-conversions here are carefully balanced to both work with ONNX export and XLA.
|
105 |
+
mask = input_ids.ne(padding_idx).int()
|
106 |
+
incremental_indices = (
|
107 |
+
torch.cumsum(mask, dim=1).type_as(mask) + past_key_values_length
|
108 |
+
) * mask
|
109 |
+
return incremental_indices.long() + padding_idx
|
110 |
+
|
111 |
+
|
112 |
+
# Copied from transformers.models.m2m_100.modeling_m2m_100.M2M100SinusoidalPositionalEmbedding->IndicTrans
|
113 |
+
class IndicTransSinusoidalPositionalEmbedding(nn.Module):
|
114 |
+
"""This module produces sinusoidal positional embeddings of any length."""
|
115 |
+
|
116 |
+
def __init__(
|
117 |
+
self, num_positions: int, embedding_dim: int, padding_idx: Optional[int] = None
|
118 |
+
):
|
119 |
+
super().__init__()
|
120 |
+
self.offset = 2
|
121 |
+
self.embedding_dim = embedding_dim
|
122 |
+
self.padding_idx = padding_idx
|
123 |
+
self.make_weights(num_positions + self.offset, embedding_dim, padding_idx)
|
124 |
+
|
125 |
+
def make_weights(
|
126 |
+
self, num_embeddings: int, embedding_dim: int, padding_idx: Optional[int] = None
|
127 |
+
):
|
128 |
+
emb_weights = self.get_embedding(num_embeddings, embedding_dim, padding_idx)
|
129 |
+
if hasattr(self, "weights"):
|
130 |
+
# in forward put the weights on the correct dtype and device of the param
|
131 |
+
emb_weights = emb_weights.to(
|
132 |
+
dtype=self.weights.dtype, device=self.weights.device
|
133 |
+
)
|
134 |
+
|
135 |
+
self.register_buffer("weights", emb_weights, persistent=False)
|
136 |
+
|
137 |
+
@staticmethod
|
138 |
+
def get_embedding(
|
139 |
+
num_embeddings: int, embedding_dim: int, padding_idx: Optional[int] = None
|
140 |
+
):
|
141 |
+
"""
|
142 |
+
Build sinusoidal embeddings.
|
143 |
+
|
144 |
+
This matches the implementation in tensor2tensor, but differs slightly from the description in Section 3.5 of
|
145 |
+
"Attention Is All You Need".
|
146 |
+
"""
|
147 |
+
half_dim = embedding_dim // 2
|
148 |
+
emb = math.log(10000) / (half_dim - 1)
|
149 |
+
emb = torch.exp(torch.arange(half_dim, dtype=torch.float) * -emb)
|
150 |
+
emb = torch.arange(num_embeddings, dtype=torch.float).unsqueeze(
|
151 |
+
1
|
152 |
+
) * emb.unsqueeze(0)
|
153 |
+
emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1).view(
|
154 |
+
num_embeddings, -1
|
155 |
+
)
|
156 |
+
if embedding_dim % 2 == 1:
|
157 |
+
# zero pad
|
158 |
+
emb = torch.cat([emb, torch.zeros(num_embeddings, 1)], dim=1)
|
159 |
+
if padding_idx is not None:
|
160 |
+
emb[padding_idx, :] = 0
|
161 |
+
|
162 |
+
return emb.to(torch.get_default_dtype())
|
163 |
+
|
164 |
+
@torch.no_grad()
|
165 |
+
def forward(
|
166 |
+
self,
|
167 |
+
input_ids: torch.Tensor = None,
|
168 |
+
inputs_embeds: torch.Tensor = None,
|
169 |
+
past_key_values_length: int = 0,
|
170 |
+
):
|
171 |
+
if input_ids is not None:
|
172 |
+
bsz, seq_len = input_ids.size()
|
173 |
+
# Create the position ids from the input token ids. Any padded tokens remain padded.
|
174 |
+
position_ids = create_position_ids_from_input_ids(
|
175 |
+
input_ids, self.padding_idx, past_key_values_length
|
176 |
+
).to(input_ids.device)
|
177 |
+
else:
|
178 |
+
bsz, seq_len = inputs_embeds.size()[:-1]
|
179 |
+
position_ids = self.create_position_ids_from_inputs_embeds(
|
180 |
+
inputs_embeds, past_key_values_length
|
181 |
+
)
|
182 |
+
|
183 |
+
# expand embeddings if needed
|
184 |
+
max_pos = self.padding_idx + 1 + seq_len + past_key_values_length
|
185 |
+
if max_pos > self.weights.size(0):
|
186 |
+
self.make_weights(
|
187 |
+
max_pos + self.offset, self.embedding_dim, self.padding_idx
|
188 |
+
)
|
189 |
+
|
190 |
+
return (
|
191 |
+
self.weights.index_select(0, position_ids.view(-1))
|
192 |
+
.view(bsz, seq_len, self.weights.shape[-1])
|
193 |
+
.detach()
|
194 |
+
)
|
195 |
+
|
196 |
+
def create_position_ids_from_inputs_embeds(
|
197 |
+
self, inputs_embeds, past_key_values_length
|
198 |
+
):
|
199 |
+
"""
|
200 |
+
We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.
|
201 |
+
|
202 |
+
Args:
|
203 |
+
inputs_embeds: torch.Tensor
|
204 |
+
|
205 |
+
Returns: torch.Tensor
|
206 |
+
"""
|
207 |
+
input_shape = inputs_embeds.size()[:-1]
|
208 |
+
sequence_length = input_shape[1]
|
209 |
+
|
210 |
+
position_ids = torch.arange(
|
211 |
+
self.padding_idx + 1,
|
212 |
+
sequence_length + self.padding_idx + 1,
|
213 |
+
dtype=torch.long,
|
214 |
+
device=inputs_embeds.device,
|
215 |
+
)
|
216 |
+
return (
|
217 |
+
position_ids.unsqueeze(0).expand(input_shape).contiguous()
|
218 |
+
+ past_key_values_length
|
219 |
+
)
|
220 |
+
|
221 |
+
|
222 |
+
# Copied from transformers.models.bart.modeling_bart.BartAttention with Bart->IndicTrans
|
223 |
+
class IndicTransAttention(nn.Module):
|
224 |
+
"""Multi-headed attention from 'Attention Is All You Need' paper"""
|
225 |
+
|
226 |
+
def __init__(
|
227 |
+
self,
|
228 |
+
embed_dim: int,
|
229 |
+
num_heads: int,
|
230 |
+
dropout: float = 0.0,
|
231 |
+
is_decoder: bool = False,
|
232 |
+
bias: bool = True,
|
233 |
+
is_causal: bool = False,
|
234 |
+
config: Optional[IndicTransConfig] = None,
|
235 |
+
):
|
236 |
+
super().__init__()
|
237 |
+
self.embed_dim = embed_dim
|
238 |
+
self.num_heads = num_heads
|
239 |
+
self.dropout = dropout
|
240 |
+
self.head_dim = embed_dim // num_heads
|
241 |
+
self.config = config
|
242 |
+
|
243 |
+
if (self.head_dim * num_heads) != self.embed_dim:
|
244 |
+
raise ValueError(
|
245 |
+
f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim}"
|
246 |
+
f" and `num_heads`: {num_heads})."
|
247 |
+
)
|
248 |
+
self.scaling = self.head_dim**-0.5
|
249 |
+
self.is_decoder = is_decoder
|
250 |
+
self.is_causal = is_causal
|
251 |
+
|
252 |
+
self.k_proj = nn.Linear(embed_dim, embed_dim, bias=bias)
|
253 |
+
self.v_proj = nn.Linear(embed_dim, embed_dim, bias=bias)
|
254 |
+
self.q_proj = nn.Linear(embed_dim, embed_dim, bias=bias)
|
255 |
+
self.out_proj = nn.Linear(embed_dim, embed_dim, bias=bias)
|
256 |
+
|
257 |
+
def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
|
258 |
+
return (
|
259 |
+
tensor.view(bsz, seq_len, self.num_heads, self.head_dim)
|
260 |
+
.transpose(1, 2)
|
261 |
+
.contiguous()
|
262 |
+
)
|
263 |
+
|
264 |
+
def forward(
|
265 |
+
self,
|
266 |
+
hidden_states: torch.Tensor,
|
267 |
+
key_value_states: Optional[torch.Tensor] = None,
|
268 |
+
past_key_value: Optional[Tuple[torch.Tensor]] = None,
|
269 |
+
attention_mask: Optional[torch.Tensor] = None,
|
270 |
+
layer_head_mask: Optional[torch.Tensor] = None,
|
271 |
+
output_attentions: bool = False,
|
272 |
+
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
|
273 |
+
"""Input shape: Batch x Time x Channel"""
|
274 |
+
|
275 |
+
# if key_value_states are provided this layer is used as a cross-attention layer
|
276 |
+
# for the decoder
|
277 |
+
is_cross_attention = key_value_states is not None
|
278 |
+
|
279 |
+
bsz, tgt_len, _ = hidden_states.size()
|
280 |
+
|
281 |
+
# get query proj
|
282 |
+
query_states = self.q_proj(hidden_states) * self.scaling
|
283 |
+
# get key, value proj
|
284 |
+
# `past_key_value[0].shape[2] == key_value_states.shape[1]`
|
285 |
+
# is checking that the `sequence_length` of the `past_key_value` is the same as
|
286 |
+
# the provided `key_value_states` to support prefix tuning
|
287 |
+
if (
|
288 |
+
is_cross_attention
|
289 |
+
and past_key_value is not None
|
290 |
+
and past_key_value[0].shape[2] == key_value_states.shape[1]
|
291 |
+
):
|
292 |
+
# reuse k,v, cross_attentions
|
293 |
+
key_states = past_key_value[0]
|
294 |
+
value_states = past_key_value[1]
|
295 |
+
elif is_cross_attention:
|
296 |
+
# cross_attentions
|
297 |
+
key_states = self._shape(self.k_proj(key_value_states), -1, bsz)
|
298 |
+
value_states = self._shape(self.v_proj(key_value_states), -1, bsz)
|
299 |
+
elif past_key_value is not None:
|
300 |
+
# reuse k, v, self_attention
|
301 |
+
key_states = self._shape(self.k_proj(hidden_states), -1, bsz)
|
302 |
+
value_states = self._shape(self.v_proj(hidden_states), -1, bsz)
|
303 |
+
key_states = torch.cat([past_key_value[0], key_states], dim=2)
|
304 |
+
value_states = torch.cat([past_key_value[1], value_states], dim=2)
|
305 |
+
else:
|
306 |
+
# self_attention
|
307 |
+
key_states = self._shape(self.k_proj(hidden_states), -1, bsz)
|
308 |
+
value_states = self._shape(self.v_proj(hidden_states), -1, bsz)
|
309 |
+
|
310 |
+
if self.is_decoder:
|
311 |
+
# if cross_attention save Tuple(torch.Tensor, torch.Tensor) of all cross attention key/value_states.
|
312 |
+
# Further calls to cross_attention layer can then reuse all cross-attention
|
313 |
+
# key/value_states (first "if" case)
|
314 |
+
# if uni-directional self-attention (decoder) save Tuple(torch.Tensor, torch.Tensor) of
|
315 |
+
# all previous decoder key/value_states. Further calls to uni-directional self-attention
|
316 |
+
# can concat previous decoder key/value_states to current projected key/value_states (third "elif" case)
|
317 |
+
# if encoder bi-directional self-attention `past_key_value` is always `None`
|
318 |
+
past_key_value = (key_states, value_states)
|
319 |
+
|
320 |
+
proj_shape = (bsz * self.num_heads, -1, self.head_dim)
|
321 |
+
query_states = self._shape(query_states, tgt_len, bsz).view(*proj_shape)
|
322 |
+
key_states = key_states.reshape(*proj_shape)
|
323 |
+
value_states = value_states.reshape(*proj_shape)
|
324 |
+
|
325 |
+
src_len = key_states.size(1)
|
326 |
+
attn_weights = torch.bmm(query_states, key_states.transpose(1, 2))
|
327 |
+
|
328 |
+
if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
|
329 |
+
raise ValueError(
|
330 |
+
f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is"
|
331 |
+
f" {attn_weights.size()}"
|
332 |
+
)
|
333 |
+
|
334 |
+
if attention_mask is not None:
|
335 |
+
if attention_mask.size() != (bsz, 1, tgt_len, src_len):
|
336 |
+
raise ValueError(
|
337 |
+
f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is {attention_mask.size()}"
|
338 |
+
)
|
339 |
+
attn_weights = (
|
340 |
+
attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
|
341 |
+
+ attention_mask
|
342 |
+
)
|
343 |
+
attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
|
344 |
+
|
345 |
+
attn_weights = F.softmax(attn_weights, dim=-1)
|
346 |
+
|
347 |
+
if layer_head_mask is not None:
|
348 |
+
if layer_head_mask.size() != (self.num_heads,):
|
349 |
+
raise ValueError(
|
350 |
+
f"Head mask for a single layer should be of size {(self.num_heads,)}, but is"
|
351 |
+
f" {layer_head_mask.size()}"
|
352 |
+
)
|
353 |
+
attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view(
|
354 |
+
bsz, self.num_heads, tgt_len, src_len
|
355 |
+
)
|
356 |
+
attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
|
357 |
+
|
358 |
+
if output_attentions:
|
359 |
+
# this operation is a bit awkward, but it's required to
|
360 |
+
# make sure that attn_weights keeps its gradient.
|
361 |
+
# In order to do so, attn_weights have to be reshaped
|
362 |
+
# twice and have to be reused in the following
|
363 |
+
attn_weights_reshaped = attn_weights.view(
|
364 |
+
bsz, self.num_heads, tgt_len, src_len
|
365 |
+
)
|
366 |
+
attn_weights = attn_weights_reshaped.view(
|
367 |
+
bsz * self.num_heads, tgt_len, src_len
|
368 |
+
)
|
369 |
+
else:
|
370 |
+
attn_weights_reshaped = None
|
371 |
+
|
372 |
+
attn_probs = F.dropout(attn_weights, p=self.dropout, training=self.training)
|
373 |
+
|
374 |
+
attn_output = torch.bmm(attn_probs, value_states)
|
375 |
+
|
376 |
+
if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):
|
377 |
+
raise ValueError(
|
378 |
+
f"`attn_output` should be of size {(bsz * self.num_heads, tgt_len, self.head_dim)}, but is"
|
379 |
+
f" {attn_output.size()}"
|
380 |
+
)
|
381 |
+
|
382 |
+
attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim)
|
383 |
+
attn_output = attn_output.transpose(1, 2)
|
384 |
+
|
385 |
+
# Use the `embed_dim` from the config (stored in the class) rather than `hidden_state` because `attn_output` can be
|
386 |
+
# partitioned across GPUs when using tensor-parallelism.
|
387 |
+
attn_output = attn_output.reshape(bsz, tgt_len, self.embed_dim)
|
388 |
+
|
389 |
+
attn_output = self.out_proj(attn_output)
|
390 |
+
|
391 |
+
return attn_output, attn_weights_reshaped, past_key_value
|
392 |
+
|
393 |
+
|
394 |
+
class IndicTransFlashAttention2(IndicTransAttention):
|
395 |
+
"""
|
396 |
+
IndicTrans flash attention module. This module inherits from `IndicTransAttention` as the weights of the module stays
|
397 |
+
untouched. The only required change would be on the forward pass where it needs to correctly call the public API of
|
398 |
+
flash attention and deal with padding tokens in case the input contains any of them.
|
399 |
+
"""
|
400 |
+
|
401 |
+
# Copied from transformers.models.llama.modeling_llama.LlamaFlashAttention2.__init__
|
402 |
+
def __init__(self, *args, **kwargs):
|
403 |
+
super().__init__(*args, **kwargs)
|
404 |
+
|
405 |
+
# TODO: Should be removed once Flash Attention for RoCm is bumped to 2.1.
|
406 |
+
# flash_attn<2.1 generates top-left aligned causal mask, while what is needed here is bottom-right alignement, that was made default for flash_attn>=2.1. This attribute is used to handle this difference. Reference: https://github.com/Dao-AILab/flash-attention/releases/tag/v2.1.0.
|
407 |
+
# Beware that with flash_attn<2.1, using q_seqlen != k_seqlen (except for the case q_seqlen == 1) produces a wrong mask (top-left).
|
408 |
+
self._flash_attn_uses_top_left_mask = not is_flash_attn_greater_or_equal_2_10()
|
409 |
+
|
410 |
+
def _reshape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
|
411 |
+
return tensor.view(bsz, seq_len, self.num_heads, self.head_dim)
|
412 |
+
|
413 |
+
def forward(
|
414 |
+
self,
|
415 |
+
hidden_states: torch.Tensor,
|
416 |
+
key_value_states: Optional[torch.Tensor] = None,
|
417 |
+
past_key_value: Optional[Tuple[torch.Tensor]] = None,
|
418 |
+
attention_mask: Optional[torch.Tensor] = None,
|
419 |
+
layer_head_mask: Optional[torch.Tensor] = None,
|
420 |
+
output_attentions: bool = False,
|
421 |
+
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
|
422 |
+
# IndicTransFlashAttention2 attention does not support output_attentions
|
423 |
+
if output_attentions:
|
424 |
+
raise ValueError("IndicTransFlashAttention2 attention does not support output_attentions")
|
425 |
+
|
426 |
+
# if key_value_states are provided this layer is used as a cross-attention layer
|
427 |
+
# for the decoder
|
428 |
+
is_cross_attention = key_value_states is not None
|
429 |
+
|
430 |
+
bsz, q_len, _ = hidden_states.size()
|
431 |
+
|
432 |
+
# get query proj
|
433 |
+
query_states = self._reshape(self.q_proj(hidden_states), -1, bsz)
|
434 |
+
# get key, value proj
|
435 |
+
# `past_key_value[0].shape[2] == key_value_states.shape[1]`
|
436 |
+
# is checking that the `sequence_length` of the `past_key_value` is the same as
|
437 |
+
# the provided `key_value_states` to support prefix tuning
|
438 |
+
if (
|
439 |
+
is_cross_attention
|
440 |
+
and past_key_value is not None
|
441 |
+
and past_key_value[0].shape[2] == key_value_states.shape[1]
|
442 |
+
):
|
443 |
+
# reuse k,v, cross_attentions
|
444 |
+
key_states = past_key_value[0].transpose(1, 2)
|
445 |
+
value_states = past_key_value[1].transpose(1, 2)
|
446 |
+
elif is_cross_attention:
|
447 |
+
# cross_attentions
|
448 |
+
key_states = self._reshape(self.k_proj(key_value_states), -1, bsz)
|
449 |
+
value_states = self._reshape(self.v_proj(key_value_states), -1, bsz)
|
450 |
+
elif past_key_value is not None:
|
451 |
+
# reuse k, v, self_attention
|
452 |
+
key_states = self._reshape(self.k_proj(hidden_states), -1, bsz)
|
453 |
+
value_states = self._reshape(self.v_proj(hidden_states), -1, bsz)
|
454 |
+
key_states = torch.cat([past_key_value[0].transpose(1, 2), key_states], dim=1)
|
455 |
+
value_states = torch.cat([past_key_value[1].transpose(1, 2), value_states], dim=1)
|
456 |
+
else:
|
457 |
+
# self_attention
|
458 |
+
key_states = self._reshape(self.k_proj(hidden_states), -1, bsz)
|
459 |
+
value_states = self._reshape(self.v_proj(hidden_states), -1, bsz)
|
460 |
+
|
461 |
+
if self.is_decoder:
|
462 |
+
# if cross_attention save Tuple(torch.Tensor, torch.Tensor) of all cross attention key/value_states.
|
463 |
+
# Further calls to cross_attention layer can then reuse all cross-attention
|
464 |
+
# key/value_states (first "if" case)
|
465 |
+
# if uni-directional self-attention (decoder) save Tuple(torch.Tensor, torch.Tensor) of
|
466 |
+
# all previous decoder key/value_states. Further calls to uni-directional self-attention
|
467 |
+
# can concat previous decoder key/value_states to current projected key/value_states (third "elif" case)
|
468 |
+
# if encoder bi-directional self-attention `past_key_value` is always `None`
|
469 |
+
past_key_value = (key_states.transpose(1, 2), value_states.transpose(1, 2))
|
470 |
+
|
471 |
+
kv_seq_len = key_states.shape[-2]
|
472 |
+
if past_key_value is not None:
|
473 |
+
kv_seq_len += past_key_value[0].shape[-2]
|
474 |
+
|
475 |
+
# In PEFT, usually we cast the layer norms in float32 for training stability reasons
|
476 |
+
# therefore the input hidden states gets silently casted in float32. Hence, we need
|
477 |
+
# cast them back in the correct dtype just to be sure everything works as expected.
|
478 |
+
# This might slowdown training & inference so it is recommended to not cast the LayerNorms
|
479 |
+
# in fp32. (LlamaRMSNorm handles it correctly)
|
480 |
+
|
481 |
+
input_dtype = query_states.dtype
|
482 |
+
if input_dtype == torch.float32:
|
483 |
+
if torch.is_autocast_enabled():
|
484 |
+
target_dtype = torch.get_autocast_gpu_dtype()
|
485 |
+
# Handle the case where the model is quantized
|
486 |
+
elif hasattr(self.config, "_pre_quantization_dtype"):
|
487 |
+
target_dtype = self.config._pre_quantization_dtype
|
488 |
+
else:
|
489 |
+
target_dtype = self.q_proj.weight.dtype
|
490 |
+
|
491 |
+
logger.warning_once(
|
492 |
+
f"The input hidden states seems to be silently casted in float32, this might be related to"
|
493 |
+
f" the fact you have upcasted embedding or layer norm layers in float32. We will cast back the input in"
|
494 |
+
f" {target_dtype}."
|
495 |
+
)
|
496 |
+
|
497 |
+
query_states = query_states.to(target_dtype)
|
498 |
+
key_states = key_states.to(target_dtype)
|
499 |
+
value_states = value_states.to(target_dtype)
|
500 |
+
|
501 |
+
attn_output = self._flash_attention_forward(
|
502 |
+
query_states, key_states, value_states, attention_mask, q_len, dropout=self.dropout
|
503 |
+
)
|
504 |
+
|
505 |
+
attn_output = attn_output.reshape(bsz, q_len, -1)
|
506 |
+
attn_output = self.out_proj(attn_output)
|
507 |
+
|
508 |
+
if not output_attentions:
|
509 |
+
attn_weights = None
|
510 |
+
|
511 |
+
return attn_output, attn_weights, past_key_value
|
512 |
+
|
513 |
+
# Copied from transformers.models.llama.modeling_llama.LlamaFlashAttention2._flash_attention_forward
|
514 |
+
def _flash_attention_forward(
|
515 |
+
self, query_states, key_states, value_states, attention_mask, query_length, dropout=0.0, softmax_scale=None
|
516 |
+
):
|
517 |
+
"""
|
518 |
+
Calls the forward method of Flash Attention - if the input hidden states contain at least one padding token
|
519 |
+
first unpad the input, then computes the attention scores and pad the final attention scores.
|
520 |
+
|
521 |
+
Args:
|
522 |
+
query_states (`torch.Tensor`):
|
523 |
+
Input query states to be passed to Flash Attention API
|
524 |
+
key_states (`torch.Tensor`):
|
525 |
+
Input key states to be passed to Flash Attention API
|
526 |
+
value_states (`torch.Tensor`):
|
527 |
+
Input value states to be passed to Flash Attention API
|
528 |
+
attention_mask (`torch.Tensor`):
|
529 |
+
The padding mask - corresponds to a tensor of size `(batch_size, seq_len)` where 0 stands for the
|
530 |
+
position of padding tokens and 1 for the position of non-padding tokens.
|
531 |
+
dropout (`float`):
|
532 |
+
Attention dropout
|
533 |
+
softmax_scale (`float`, *optional*):
|
534 |
+
The scaling of QK^T before applying softmax. Default to 1 / sqrt(head_dim)
|
535 |
+
"""
|
536 |
+
if not self._flash_attn_uses_top_left_mask:
|
537 |
+
causal = self.is_causal
|
538 |
+
else:
|
539 |
+
# TODO: Remove the `query_length != 1` check once Flash Attention for RoCm is bumped to 2.1. For details, please see the comment in LlamaFlashAttention2 __init__.
|
540 |
+
causal = self.is_causal and query_length != 1
|
541 |
+
|
542 |
+
# Contains at least one padding token in the sequence
|
543 |
+
if attention_mask is not None:
|
544 |
+
batch_size = query_states.shape[0]
|
545 |
+
query_states, key_states, value_states, indices_q, cu_seq_lens, max_seq_lens = self._upad_input(
|
546 |
+
query_states, key_states, value_states, attention_mask, query_length
|
547 |
+
)
|
548 |
+
|
549 |
+
cu_seqlens_q, cu_seqlens_k = cu_seq_lens
|
550 |
+
max_seqlen_in_batch_q, max_seqlen_in_batch_k = max_seq_lens
|
551 |
+
|
552 |
+
attn_output_unpad = flash_attn_varlen_func(
|
553 |
+
query_states,
|
554 |
+
key_states,
|
555 |
+
value_states,
|
556 |
+
cu_seqlens_q=cu_seqlens_q,
|
557 |
+
cu_seqlens_k=cu_seqlens_k,
|
558 |
+
max_seqlen_q=max_seqlen_in_batch_q,
|
559 |
+
max_seqlen_k=max_seqlen_in_batch_k,
|
560 |
+
dropout_p=dropout,
|
561 |
+
softmax_scale=softmax_scale,
|
562 |
+
causal=causal,
|
563 |
+
)
|
564 |
+
|
565 |
+
attn_output = pad_input(attn_output_unpad, indices_q, batch_size, query_length)
|
566 |
+
else:
|
567 |
+
attn_output = flash_attn_func(
|
568 |
+
query_states, key_states, value_states, dropout, softmax_scale=softmax_scale, causal=causal
|
569 |
+
)
|
570 |
+
|
571 |
+
return attn_output
|
572 |
+
|
573 |
+
# Copied from transformers.models.llama.modeling_llama.LlamaFlashAttention2._upad_input
|
574 |
+
def _upad_input(self, query_layer, key_layer, value_layer, attention_mask, query_length):
|
575 |
+
indices_k, cu_seqlens_k, max_seqlen_in_batch_k = _get_unpad_data(attention_mask)
|
576 |
+
batch_size, kv_seq_len, num_key_value_heads, head_dim = key_layer.shape
|
577 |
+
|
578 |
+
key_layer = index_first_axis(
|
579 |
+
key_layer.reshape(batch_size * kv_seq_len, num_key_value_heads, head_dim), indices_k
|
580 |
+
)
|
581 |
+
value_layer = index_first_axis(
|
582 |
+
value_layer.reshape(batch_size * kv_seq_len, num_key_value_heads, head_dim), indices_k
|
583 |
+
)
|
584 |
+
if query_length == kv_seq_len:
|
585 |
+
query_layer = index_first_axis(
|
586 |
+
query_layer.reshape(batch_size * kv_seq_len, self.num_heads, head_dim), indices_k
|
587 |
+
)
|
588 |
+
cu_seqlens_q = cu_seqlens_k
|
589 |
+
max_seqlen_in_batch_q = max_seqlen_in_batch_k
|
590 |
+
indices_q = indices_k
|
591 |
+
elif query_length == 1:
|
592 |
+
max_seqlen_in_batch_q = 1
|
593 |
+
cu_seqlens_q = torch.arange(
|
594 |
+
batch_size + 1, dtype=torch.int32, device=query_layer.device
|
595 |
+
) # There is a memcpy here, that is very bad.
|
596 |
+
indices_q = cu_seqlens_q[:-1]
|
597 |
+
query_layer = query_layer.squeeze(1)
|
598 |
+
else:
|
599 |
+
# The -q_len: slice assumes left padding.
|
600 |
+
attention_mask = attention_mask[:, -query_length:]
|
601 |
+
query_layer, indices_q, cu_seqlens_q, max_seqlen_in_batch_q = unpad_input(query_layer, attention_mask)
|
602 |
+
|
603 |
+
return (
|
604 |
+
query_layer,
|
605 |
+
key_layer,
|
606 |
+
value_layer,
|
607 |
+
indices_q,
|
608 |
+
(cu_seqlens_q, cu_seqlens_k),
|
609 |
+
(max_seqlen_in_batch_q, max_seqlen_in_batch_k),
|
610 |
+
)
|
611 |
+
|
612 |
+
|
613 |
+
class IndicTransSdpaAttention(IndicTransAttention):
|
614 |
+
def forward(
|
615 |
+
self,
|
616 |
+
hidden_states: torch.Tensor,
|
617 |
+
key_value_states: Optional[torch.Tensor] = None,
|
618 |
+
past_key_value: Optional[Tuple[torch.Tensor]] = None,
|
619 |
+
attention_mask: Optional[torch.Tensor] = None,
|
620 |
+
layer_head_mask: Optional[torch.Tensor] = None,
|
621 |
+
output_attentions: bool = False,
|
622 |
+
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
|
623 |
+
"""Input shape: Batch x Time x Channel"""
|
624 |
+
if output_attentions or layer_head_mask is not None:
|
625 |
+
# TODO: Improve this warning with e.g. `model.config._attn_implementation = "manual"` once this is implemented.
|
626 |
+
logger.warning_once(
|
627 |
+
"IndicTransModel is using IndicTransSdpaAttention, but `torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True` or `layer_head_mask` not None. Falling back to the manual attention"
|
628 |
+
' implementation, but specifying the manual implementation will be required from Transformers version v5.0.0 onwards. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.'
|
629 |
+
)
|
630 |
+
return super().forward(
|
631 |
+
hidden_states,
|
632 |
+
key_value_states=key_value_states,
|
633 |
+
past_key_value=past_key_value,
|
634 |
+
attention_mask=attention_mask,
|
635 |
+
layer_head_mask=layer_head_mask,
|
636 |
+
output_attentions=output_attentions,
|
637 |
+
)
|
638 |
+
|
639 |
+
# if key_value_states are provided this layer is used as a cross-attention layer
|
640 |
+
# for the decoder
|
641 |
+
is_cross_attention = key_value_states is not None
|
642 |
+
|
643 |
+
bsz, tgt_len, _ = hidden_states.size()
|
644 |
+
|
645 |
+
# get query proj
|
646 |
+
query_states = self.q_proj(hidden_states)
|
647 |
+
# get key, value proj
|
648 |
+
# `past_key_value[0].shape[2] == key_value_states.shape[1]`
|
649 |
+
# is checking that the `sequence_length` of the `past_key_value` is the same as
|
650 |
+
# the provided `key_value_states` to support prefix tuning
|
651 |
+
if (
|
652 |
+
is_cross_attention
|
653 |
+
and past_key_value is not None
|
654 |
+
and past_key_value[0].shape[2] == key_value_states.shape[1]
|
655 |
+
):
|
656 |
+
# reuse k,v, cross_attentions
|
657 |
+
key_states = past_key_value[0]
|
658 |
+
value_states = past_key_value[1]
|
659 |
+
elif is_cross_attention:
|
660 |
+
# cross_attentions
|
661 |
+
key_states = self._shape(self.k_proj(key_value_states), -1, bsz)
|
662 |
+
value_states = self._shape(self.v_proj(key_value_states), -1, bsz)
|
663 |
+
elif past_key_value is not None:
|
664 |
+
# reuse k, v, self_attention
|
665 |
+
key_states = self._shape(self.k_proj(hidden_states), -1, bsz)
|
666 |
+
value_states = self._shape(self.v_proj(hidden_states), -1, bsz)
|
667 |
+
key_states = torch.cat([past_key_value[0], key_states], dim=2)
|
668 |
+
value_states = torch.cat([past_key_value[1], value_states], dim=2)
|
669 |
+
else:
|
670 |
+
# self_attention
|
671 |
+
key_states = self._shape(self.k_proj(hidden_states), -1, bsz)
|
672 |
+
value_states = self._shape(self.v_proj(hidden_states), -1, bsz)
|
673 |
+
|
674 |
+
if self.is_decoder:
|
675 |
+
# if cross_attention save Tuple(torch.Tensor, torch.Tensor) of all cross attention key/value_states.
|
676 |
+
# Further calls to cross_attention layer can then reuse all cross-attention
|
677 |
+
# key/value_states (first "if" case)
|
678 |
+
# if uni-directional self-attention (decoder) save Tuple(torch.Tensor, torch.Tensor) of
|
679 |
+
# all previous decoder key/value_states. Further calls to uni-directional self-attention
|
680 |
+
# can concat previous decoder key/value_states to current projected key/value_states (third "elif" case)
|
681 |
+
# if encoder bi-directional self-attention `past_key_value` is always `None`
|
682 |
+
past_key_value = (key_states, value_states)
|
683 |
+
|
684 |
+
query_states = self._shape(query_states, tgt_len, bsz)
|
685 |
+
|
686 |
+
# NOTE: SDPA with memory-efficient backend is currently (torch==2.1.2) bugged when using non-contiguous inputs and a custom attn_mask,
|
687 |
+
# but we are fine here as `_shape` do call `.contiguous()`. Reference: https://github.com/pytorch/pytorch/issues/112577
|
688 |
+
attn_output = F.scaled_dot_product_attention(
|
689 |
+
query_states,
|
690 |
+
key_states,
|
691 |
+
value_states,
|
692 |
+
attn_mask=attention_mask,
|
693 |
+
dropout_p=self.dropout if self.training else 0.0,
|
694 |
+
# The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case tgt_len == 1.
|
695 |
+
is_causal=self.is_causal and attention_mask is None and tgt_len > 1,
|
696 |
+
)
|
697 |
+
|
698 |
+
if attn_output.size() != (bsz, self.num_heads, tgt_len, self.head_dim):
|
699 |
+
raise ValueError(
|
700 |
+
f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is"
|
701 |
+
f" {attn_output.size()}"
|
702 |
+
)
|
703 |
+
|
704 |
+
attn_output = attn_output.transpose(1, 2)
|
705 |
+
|
706 |
+
# Use the `embed_dim` from the config (stored in the class) rather than `hidden_state` because `attn_output` can be
|
707 |
+
# partitioned across GPUs when using tensor-parallelism.
|
708 |
+
attn_output = attn_output.reshape(bsz, tgt_len, self.embed_dim)
|
709 |
+
|
710 |
+
attn_output = self.out_proj(attn_output)
|
711 |
+
|
712 |
+
return attn_output, None, past_key_value
|
713 |
+
|
714 |
+
|
715 |
+
INDICTRANS_ATTENTION_CLASSES = {
|
716 |
+
"eager": IndicTransAttention,
|
717 |
+
"sdpa": IndicTransSdpaAttention,
|
718 |
+
"flash_attention_2": IndicTransFlashAttention2,
|
719 |
+
}
|
720 |
+
|
721 |
+
# Copied from transformers.models.mbart.modeling_mbart.MBartEncoderLayer with MBart->IndicTrans
|
722 |
+
class IndicTransEncoderLayer(nn.Module):
|
723 |
+
def __init__(self, config: IndicTransConfig):
|
724 |
+
super().__init__()
|
725 |
+
self.embed_dim = config.encoder_embed_dim
|
726 |
+
self.self_attn = INDICTRANS_ATTENTION_CLASSES[config._attn_implementation](
|
727 |
+
embed_dim=self.embed_dim,
|
728 |
+
num_heads=config.encoder_attention_heads,
|
729 |
+
dropout=config.attention_dropout,
|
730 |
+
config=config,
|
731 |
+
)
|
732 |
+
self.self_attn_layer_norm = nn.LayerNorm(self.embed_dim)
|
733 |
+
self.dropout = config.dropout
|
734 |
+
self.activation_fn = ACT2FN[config.activation_function]
|
735 |
+
self.activation_dropout = config.activation_dropout
|
736 |
+
self.fc1 = nn.Linear(self.embed_dim, config.encoder_ffn_dim)
|
737 |
+
self.fc2 = nn.Linear(config.encoder_ffn_dim, self.embed_dim)
|
738 |
+
self.final_layer_norm = nn.LayerNorm(self.embed_dim)
|
739 |
+
self.normalize_before = config.encoder_normalize_before
|
740 |
+
|
741 |
+
def forward(
|
742 |
+
self,
|
743 |
+
hidden_states: torch.Tensor,
|
744 |
+
attention_mask: torch.Tensor,
|
745 |
+
layer_head_mask: torch.Tensor,
|
746 |
+
output_attentions: bool = False,
|
747 |
+
) -> torch.Tensor:
|
748 |
+
"""
|
749 |
+
Args:
|
750 |
+
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
751 |
+
attention_mask (`torch.FloatTensor`): attention mask of size
|
752 |
+
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
753 |
+
layer_head_mask (`torch.FloatTensor`): mask for attention heads in a given layer of size
|
754 |
+
`(encoder_attention_heads,)`.
|
755 |
+
output_attentions (`bool`, *optional*):
|
756 |
+
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
|
757 |
+
returned tensors for more detail.
|
758 |
+
"""
|
759 |
+
residual = hidden_states
|
760 |
+
if self.normalize_before:
|
761 |
+
hidden_states = self.self_attn_layer_norm(hidden_states)
|
762 |
+
hidden_states, attn_weights, _ = self.self_attn(
|
763 |
+
hidden_states=hidden_states,
|
764 |
+
attention_mask=attention_mask,
|
765 |
+
layer_head_mask=layer_head_mask,
|
766 |
+
output_attentions=output_attentions,
|
767 |
+
)
|
768 |
+
hidden_states = F.dropout(hidden_states, p=self.dropout, training=self.training)
|
769 |
+
hidden_states = residual + hidden_states
|
770 |
+
if not self.normalize_before:
|
771 |
+
hidden_states = self.self_attn_layer_norm(hidden_states)
|
772 |
+
|
773 |
+
residual = hidden_states
|
774 |
+
if self.normalize_before:
|
775 |
+
hidden_states = self.final_layer_norm(hidden_states)
|
776 |
+
hidden_states = self.activation_fn(self.fc1(hidden_states))
|
777 |
+
hidden_states = F.dropout(
|
778 |
+
hidden_states, p=self.activation_dropout, training=self.training
|
779 |
+
)
|
780 |
+
hidden_states = self.fc2(hidden_states)
|
781 |
+
hidden_states = F.dropout(hidden_states, p=self.dropout, training=self.training)
|
782 |
+
hidden_states = residual + hidden_states
|
783 |
+
if not self.normalize_before:
|
784 |
+
hidden_states = self.final_layer_norm(hidden_states)
|
785 |
+
|
786 |
+
if hidden_states.dtype == torch.float16 and (
|
787 |
+
torch.isinf(hidden_states).any() or torch.isnan(hidden_states).any()
|
788 |
+
):
|
789 |
+
clamp_value = torch.finfo(hidden_states.dtype).max - 1000
|
790 |
+
hidden_states = torch.clamp(
|
791 |
+
hidden_states, min=-clamp_value, max=clamp_value
|
792 |
+
)
|
793 |
+
|
794 |
+
outputs = (hidden_states,)
|
795 |
+
|
796 |
+
if output_attentions:
|
797 |
+
outputs += (attn_weights,)
|
798 |
+
|
799 |
+
return outputs
|
800 |
+
|
801 |
+
|
802 |
+
# Copied from transformers.models.mbart.modeling_mbart.MBartDecoderLayer with MBart->IndicTrans
|
803 |
+
class IndicTransDecoderLayer(nn.Module):
|
804 |
+
def __init__(self, config: IndicTransConfig):
|
805 |
+
super().__init__()
|
806 |
+
self.embed_dim = config.decoder_embed_dim
|
807 |
+
|
808 |
+
self.self_attn = INDICTRANS_ATTENTION_CLASSES[config._attn_implementation](
|
809 |
+
embed_dim=self.embed_dim,
|
810 |
+
num_heads=config.decoder_attention_heads,
|
811 |
+
dropout=config.attention_dropout,
|
812 |
+
is_decoder=True,
|
813 |
+
is_causal=True,
|
814 |
+
config=config,
|
815 |
+
)
|
816 |
+
self.dropout = config.dropout
|
817 |
+
self.activation_fn = ACT2FN[config.activation_function]
|
818 |
+
self.activation_dropout = config.activation_dropout
|
819 |
+
|
820 |
+
self.self_attn_layer_norm = nn.LayerNorm(self.embed_dim)
|
821 |
+
self.encoder_attn = INDICTRANS_ATTENTION_CLASSES[config._attn_implementation](
|
822 |
+
self.embed_dim,
|
823 |
+
config.decoder_attention_heads,
|
824 |
+
dropout=config.attention_dropout,
|
825 |
+
is_decoder=True,
|
826 |
+
config=config,
|
827 |
+
)
|
828 |
+
self.encoder_attn_layer_norm = nn.LayerNorm(self.embed_dim)
|
829 |
+
self.fc1 = nn.Linear(self.embed_dim, config.decoder_ffn_dim)
|
830 |
+
self.fc2 = nn.Linear(config.decoder_ffn_dim, self.embed_dim)
|
831 |
+
self.final_layer_norm = nn.LayerNorm(self.embed_dim)
|
832 |
+
self.normalize_before = config.decoder_normalize_before
|
833 |
+
|
834 |
+
def forward(
|
835 |
+
self,
|
836 |
+
hidden_states: torch.Tensor,
|
837 |
+
attention_mask: Optional[torch.Tensor] = None,
|
838 |
+
encoder_hidden_states: Optional[torch.Tensor] = None,
|
839 |
+
encoder_attention_mask: Optional[torch.Tensor] = None,
|
840 |
+
layer_head_mask: Optional[torch.Tensor] = None,
|
841 |
+
cross_attn_layer_head_mask: Optional[torch.Tensor] = None,
|
842 |
+
past_key_value: Optional[Tuple[torch.Tensor]] = None,
|
843 |
+
output_attentions: Optional[bool] = False,
|
844 |
+
use_cache: Optional[bool] = True,
|
845 |
+
) -> torch.Tensor:
|
846 |
+
"""
|
847 |
+
Args:
|
848 |
+
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
849 |
+
attention_mask (`torch.FloatTensor`): attention mask of size
|
850 |
+
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
851 |
+
encoder_hidden_states (`torch.FloatTensor`):
|
852 |
+
cross attention input to the layer of shape `(batch, seq_len, embed_dim)`
|
853 |
+
encoder_attention_mask (`torch.FloatTensor`): encoder attention mask of size
|
854 |
+
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
855 |
+
layer_head_mask (`torch.FloatTensor`): mask for attention heads in a given layer of size
|
856 |
+
`(encoder_attention_heads,)`.
|
857 |
+
cross_attn_layer_head_mask (`torch.FloatTensor`): mask for cross-attention heads in a given layer of
|
858 |
+
size `(decoder_attention_heads,)`.
|
859 |
+
past_key_value (`Tuple(torch.FloatTensor)`): cached past key and value projection states
|
860 |
+
output_attentions (`bool`, *optional*):
|
861 |
+
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
|
862 |
+
returned tensors for more detail.
|
863 |
+
"""
|
864 |
+
residual = hidden_states
|
865 |
+
if self.normalize_before:
|
866 |
+
hidden_states = self.self_attn_layer_norm(hidden_states)
|
867 |
+
|
868 |
+
# Self Attention
|
869 |
+
# decoder uni-directional self-attention cached key/values tuple is at positions 1,2
|
870 |
+
self_attn_past_key_value = (
|
871 |
+
past_key_value[:2] if past_key_value is not None else None
|
872 |
+
)
|
873 |
+
# add present self-attn cache to positions 1,2 of present_key_value tuple
|
874 |
+
hidden_states, self_attn_weights, present_key_value = self.self_attn(
|
875 |
+
hidden_states=hidden_states,
|
876 |
+
past_key_value=self_attn_past_key_value,
|
877 |
+
attention_mask=attention_mask,
|
878 |
+
layer_head_mask=layer_head_mask,
|
879 |
+
output_attentions=output_attentions,
|
880 |
+
)
|
881 |
+
hidden_states = F.dropout(hidden_states, p=self.dropout, training=self.training)
|
882 |
+
hidden_states = residual + hidden_states
|
883 |
+
if not self.normalize_before:
|
884 |
+
hidden_states = self.self_attn_layer_norm(hidden_states)
|
885 |
+
|
886 |
+
# Cross-Attention Block
|
887 |
+
cross_attn_present_key_value = None
|
888 |
+
cross_attn_weights = None
|
889 |
+
if encoder_hidden_states is not None:
|
890 |
+
residual = hidden_states
|
891 |
+
if self.normalize_before:
|
892 |
+
hidden_states = self.encoder_attn_layer_norm(hidden_states)
|
893 |
+
|
894 |
+
# cross_attn cached key/values tuple is at positions 3,4 of present_key_value tuple
|
895 |
+
cross_attn_past_key_value = (
|
896 |
+
past_key_value[-2:] if past_key_value is not None else None
|
897 |
+
)
|
898 |
+
(
|
899 |
+
hidden_states,
|
900 |
+
cross_attn_weights,
|
901 |
+
cross_attn_present_key_value,
|
902 |
+
) = self.encoder_attn(
|
903 |
+
hidden_states=hidden_states,
|
904 |
+
key_value_states=encoder_hidden_states,
|
905 |
+
attention_mask=encoder_attention_mask,
|
906 |
+
layer_head_mask=cross_attn_layer_head_mask,
|
907 |
+
past_key_value=cross_attn_past_key_value,
|
908 |
+
output_attentions=output_attentions,
|
909 |
+
)
|
910 |
+
hidden_states = F.dropout(
|
911 |
+
hidden_states, p=self.dropout, training=self.training
|
912 |
+
)
|
913 |
+
hidden_states = residual + hidden_states
|
914 |
+
if not self.normalize_before:
|
915 |
+
hidden_states = self.encoder_attn_layer_norm(hidden_states)
|
916 |
+
|
917 |
+
# add cross-attn to positions 3,4 of present_key_value tuple
|
918 |
+
present_key_value = present_key_value + cross_attn_present_key_value
|
919 |
+
|
920 |
+
# Fully Connected
|
921 |
+
residual = hidden_states
|
922 |
+
if self.normalize_before:
|
923 |
+
hidden_states = self.final_layer_norm(hidden_states)
|
924 |
+
hidden_states = self.activation_fn(self.fc1(hidden_states))
|
925 |
+
hidden_states = F.dropout(
|
926 |
+
hidden_states, p=self.activation_dropout, training=self.training
|
927 |
+
)
|
928 |
+
hidden_states = self.fc2(hidden_states)
|
929 |
+
hidden_states = F.dropout(hidden_states, p=self.dropout, training=self.training)
|
930 |
+
hidden_states = residual + hidden_states
|
931 |
+
if not self.normalize_before:
|
932 |
+
hidden_states = self.final_layer_norm(hidden_states)
|
933 |
+
|
934 |
+
outputs = (hidden_states,)
|
935 |
+
|
936 |
+
if output_attentions:
|
937 |
+
outputs += (self_attn_weights, cross_attn_weights)
|
938 |
+
|
939 |
+
if use_cache:
|
940 |
+
outputs += (present_key_value,)
|
941 |
+
|
942 |
+
return outputs
|
943 |
+
|
944 |
+
|
945 |
+
# Copied from transformers.models.m2m_100.modeling_m2m_100.M2M100PretrainedModel->IndicTrans
|
946 |
+
class IndicTransPreTrainedModel(PreTrainedModel):
|
947 |
+
config_class = IndicTransConfig
|
948 |
+
base_model_prefix = "model"
|
949 |
+
supports_gradient_checkpointing = True
|
950 |
+
_no_split_modules = ["IndicTransAttention"]
|
951 |
+
|
952 |
+
def _init_weights(self, module):
|
953 |
+
std = self.config.init_std
|
954 |
+
if isinstance(module, nn.Linear):
|
955 |
+
module.weight.data.normal_(mean=0.0, std=std)
|
956 |
+
if module.bias is not None:
|
957 |
+
module.bias.data.zero_()
|
958 |
+
elif isinstance(module, nn.Embedding):
|
959 |
+
module.weight.data.normal_(mean=0.0, std=std)
|
960 |
+
if module.padding_idx is not None:
|
961 |
+
module.weight.data[module.padding_idx].zero_()
|
962 |
+
|
963 |
+
def _set_gradient_checkpointing(self, module, value=False):
|
964 |
+
if isinstance(module, (IndicTransDecoder, IndicTransEncoder)):
|
965 |
+
module.gradient_checkpointing = value
|
966 |
+
|
967 |
+
|
968 |
+
# Copied from transformers.models.m2m_100.modeling_m2m_100.M2M100EncoderLayer->IndicTrans
|
969 |
+
class IndicTransEncoder(IndicTransPreTrainedModel):
|
970 |
+
"""
|
971 |
+
Transformer encoder consisting of *config.encoder_layers* self attention layers. Each layer is a
|
972 |
+
[`IndicTransEncoderLayer`].
|
973 |
+
|
974 |
+
Args:
|
975 |
+
config: IndicTransConfig
|
976 |
+
embed_tokens (nn.Embedding): output embedding
|
977 |
+
"""
|
978 |
+
|
979 |
+
def __init__(
|
980 |
+
self, config: IndicTransConfig, embed_tokens: Optional[nn.Embedding] = None
|
981 |
+
):
|
982 |
+
super().__init__(config)
|
983 |
+
|
984 |
+
self.dropout = config.dropout
|
985 |
+
self.layerdrop = config.encoder_layerdrop
|
986 |
+
|
987 |
+
embed_dim = config.encoder_embed_dim
|
988 |
+
self.padding_idx = config.pad_token_id
|
989 |
+
self.max_source_positions = config.max_source_positions
|
990 |
+
self.embed_scale = math.sqrt(embed_dim) if config.scale_embedding else 1.0
|
991 |
+
|
992 |
+
self.embed_tokens = nn.Embedding(
|
993 |
+
config.encoder_vocab_size, embed_dim, self.padding_idx
|
994 |
+
)
|
995 |
+
|
996 |
+
if embed_tokens is not None:
|
997 |
+
self.embed_tokens.weight = embed_tokens.weight
|
998 |
+
|
999 |
+
self.embed_positions = IndicTransSinusoidalPositionalEmbedding(
|
1000 |
+
config.max_source_positions,
|
1001 |
+
embed_dim,
|
1002 |
+
self.padding_idx,
|
1003 |
+
)
|
1004 |
+
self.layers = nn.ModuleList(
|
1005 |
+
[IndicTransEncoderLayer(config) for _ in range(config.encoder_layers)]
|
1006 |
+
)
|
1007 |
+
self.layer_norm = (
|
1008 |
+
nn.LayerNorm(embed_dim) if config.encoder_normalize_before else None
|
1009 |
+
)
|
1010 |
+
self.layernorm_embedding = (
|
1011 |
+
nn.LayerNorm(embed_dim) if config.layernorm_embedding else None
|
1012 |
+
)
|
1013 |
+
|
1014 |
+
self._use_flash_attention_2 = config._attn_implementation == "flash_attention_2"
|
1015 |
+
self._use_sdpa = config._attn_implementation == "sdpa"
|
1016 |
+
|
1017 |
+
self.gradient_checkpointing = False
|
1018 |
+
# Initialize weights and apply final processing
|
1019 |
+
self.post_init()
|
1020 |
+
|
1021 |
+
def forward(
|
1022 |
+
self,
|
1023 |
+
input_ids: Optional[torch.Tensor] = None,
|
1024 |
+
attention_mask: Optional[torch.Tensor] = None,
|
1025 |
+
head_mask: Optional[torch.Tensor] = None,
|
1026 |
+
inputs_embeds: Optional[torch.Tensor] = None,
|
1027 |
+
output_attentions: Optional[bool] = None,
|
1028 |
+
output_hidden_states: Optional[bool] = None,
|
1029 |
+
return_dict: Optional[bool] = None,
|
1030 |
+
):
|
1031 |
+
r"""
|
1032 |
+
Args:
|
1033 |
+
input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
|
1034 |
+
Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you
|
1035 |
+
provide it.
|
1036 |
+
|
1037 |
+
Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
|
1038 |
+
[`PreTrainedTokenizer.__call__`] for details.
|
1039 |
+
|
1040 |
+
[What are input IDs?](../glossary#input-ids)
|
1041 |
+
attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
|
1042 |
+
Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
|
1043 |
+
|
1044 |
+
- 1 for tokens that are **not masked**,
|
1045 |
+
- 0 for tokens that are **masked**.
|
1046 |
+
|
1047 |
+
[What are attention masks?](../glossary#attention-mask)
|
1048 |
+
head_mask (`torch.Tensor` of shape `(encoder_layers, encoder_attention_heads)`, *optional*):
|
1049 |
+
Mask to nullify selected heads of the attention modules. Mask values selected in `[0, 1]`:
|
1050 |
+
|
1051 |
+
- 1 indicates the head is **not masked**,
|
1052 |
+
- 0 indicates the head is **masked**.
|
1053 |
+
|
1054 |
+
inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
|
1055 |
+
Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation.
|
1056 |
+
This is useful if you want more control over how to convert `input_ids` indices into associated vectors
|
1057 |
+
than the model's internal embedding lookup matrix.
|
1058 |
+
output_attentions (`bool`, *optional*):
|
1059 |
+
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
|
1060 |
+
returned tensors for more detail.
|
1061 |
+
output_hidden_states (`bool`, *optional*):
|
1062 |
+
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
|
1063 |
+
for more detail.
|
1064 |
+
return_dict (`bool`, *optional*):
|
1065 |
+
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
|
1066 |
+
"""
|
1067 |
+
output_attentions = (
|
1068 |
+
output_attentions
|
1069 |
+
if output_attentions is not None
|
1070 |
+
else self.config.output_attentions
|
1071 |
+
)
|
1072 |
+
output_hidden_states = (
|
1073 |
+
output_hidden_states
|
1074 |
+
if output_hidden_states is not None
|
1075 |
+
else self.config.output_hidden_states
|
1076 |
+
)
|
1077 |
+
return_dict = (
|
1078 |
+
return_dict if return_dict is not None else self.config.use_return_dict
|
1079 |
+
)
|
1080 |
+
|
1081 |
+
# retrieve input_ids and inputs_embeds
|
1082 |
+
if input_ids is not None and inputs_embeds is not None:
|
1083 |
+
raise ValueError(
|
1084 |
+
"You cannot specify both input_ids and inputs_embeds at the same time"
|
1085 |
+
)
|
1086 |
+
elif input_ids is not None:
|
1087 |
+
self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
|
1088 |
+
input_shape = input_ids.size()
|
1089 |
+
input_ids = input_ids.view(-1, input_shape[-1])
|
1090 |
+
elif inputs_embeds is not None:
|
1091 |
+
input_shape = inputs_embeds.size()[:-1]
|
1092 |
+
else:
|
1093 |
+
raise ValueError("You have to specify either input_ids or inputs_embeds")
|
1094 |
+
|
1095 |
+
if inputs_embeds is None:
|
1096 |
+
inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
|
1097 |
+
|
1098 |
+
embed_pos = self.embed_positions(input_ids, inputs_embeds)
|
1099 |
+
embed_pos = embed_pos.to(inputs_embeds.device)
|
1100 |
+
|
1101 |
+
hidden_states = inputs_embeds + embed_pos
|
1102 |
+
if self.layernorm_embedding is not None:
|
1103 |
+
hidden_states = self.layernorm_embedding(hidden_states)
|
1104 |
+
hidden_states = F.dropout(hidden_states, p=self.dropout, training=self.training)
|
1105 |
+
|
1106 |
+
if attention_mask is not None:
|
1107 |
+
if self._use_flash_attention_2:
|
1108 |
+
attention_mask = attention_mask if 0 in attention_mask else None
|
1109 |
+
elif self._use_sdpa and head_mask is None and not output_attentions:
|
1110 |
+
# output_attentions=True & head_mask can not be supported when using SDPA, fall back to
|
1111 |
+
# the manual implementation that requires a 4D causal mask in all cases.
|
1112 |
+
# [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
|
1113 |
+
attention_mask = _prepare_4d_attention_mask_for_sdpa(attention_mask, inputs_embeds.dtype)
|
1114 |
+
else:
|
1115 |
+
# [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
|
1116 |
+
attention_mask = _prepare_4d_attention_mask(attention_mask, inputs_embeds.dtype)
|
1117 |
+
|
1118 |
+
|
1119 |
+
encoder_states = () if output_hidden_states else None
|
1120 |
+
all_attentions = () if output_attentions else None
|
1121 |
+
|
1122 |
+
# check if head_mask has a correct number of layers specified if desired
|
1123 |
+
if head_mask is not None:
|
1124 |
+
if head_mask.size()[0] != len(self.layers):
|
1125 |
+
raise ValueError(
|
1126 |
+
f"The head_mask should be specified for {len(self.layers)} layers, but it is for"
|
1127 |
+
f" {head_mask.size()[0]}."
|
1128 |
+
)
|
1129 |
+
deepspeed_zero3_is_enabled = is_deepspeed_zero3_enabled()
|
1130 |
+
|
1131 |
+
for idx, encoder_layer in enumerate(self.layers):
|
1132 |
+
if output_hidden_states:
|
1133 |
+
encoder_states = encoder_states + (hidden_states,)
|
1134 |
+
|
1135 |
+
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
|
1136 |
+
dropout_probability = torch.rand([])
|
1137 |
+
|
1138 |
+
skip_the_layer = (
|
1139 |
+
True
|
1140 |
+
if self.training and (dropout_probability < self.layerdrop)
|
1141 |
+
else False
|
1142 |
+
)
|
1143 |
+
if not skip_the_layer or deepspeed_zero3_is_enabled:
|
1144 |
+
# under deepspeed zero3 all gpus must run in sync
|
1145 |
+
|
1146 |
+
if self.gradient_checkpointing and self.training:
|
1147 |
+
# create gradient checkpointing function
|
1148 |
+
def create_custom_forward(module):
|
1149 |
+
def custom_forward(*inputs):
|
1150 |
+
return module(*inputs, output_attentions)
|
1151 |
+
|
1152 |
+
return custom_forward
|
1153 |
+
|
1154 |
+
layer_outputs = torch.utils.checkpoint.checkpoint(
|
1155 |
+
create_custom_forward(encoder_layer),
|
1156 |
+
hidden_states,
|
1157 |
+
attention_mask,
|
1158 |
+
(head_mask[idx] if head_mask is not None else None),
|
1159 |
+
)
|
1160 |
+
else:
|
1161 |
+
layer_outputs = encoder_layer(
|
1162 |
+
hidden_states,
|
1163 |
+
attention_mask,
|
1164 |
+
layer_head_mask=(
|
1165 |
+
head_mask[idx] if head_mask is not None else None
|
1166 |
+
),
|
1167 |
+
output_attentions=output_attentions,
|
1168 |
+
)
|
1169 |
+
|
1170 |
+
hidden_states = layer_outputs[0]
|
1171 |
+
|
1172 |
+
if skip_the_layer:
|
1173 |
+
layer_outputs = (None, None)
|
1174 |
+
|
1175 |
+
if output_attentions:
|
1176 |
+
all_attentions = all_attentions + (layer_outputs[1],)
|
1177 |
+
|
1178 |
+
if self.layer_norm is not None:
|
1179 |
+
hidden_states = self.layer_norm(hidden_states)
|
1180 |
+
|
1181 |
+
if output_hidden_states:
|
1182 |
+
encoder_states = encoder_states + (hidden_states,)
|
1183 |
+
|
1184 |
+
if not return_dict:
|
1185 |
+
return tuple(
|
1186 |
+
v
|
1187 |
+
for v in [hidden_states, encoder_states, all_attentions]
|
1188 |
+
if v is not None
|
1189 |
+
)
|
1190 |
+
return BaseModelOutput(
|
1191 |
+
last_hidden_state=hidden_states,
|
1192 |
+
hidden_states=encoder_states,
|
1193 |
+
attentions=all_attentions,
|
1194 |
+
)
|
1195 |
+
|
1196 |
+
|
1197 |
+
# Copied from transformers.models.m2m_100.modeling_m2m_100.M2M100DecoderLayer->IndicTrans
|
1198 |
+
class IndicTransDecoder(IndicTransPreTrainedModel):
|
1199 |
+
"""
|
1200 |
+
Transformer decoder consisting of *config.decoder_layers* layers. Each layer is a [`IndicTransDecoderLayer`]
|
1201 |
+
|
1202 |
+
Args:
|
1203 |
+
config: IndicTransConfig
|
1204 |
+
embed_tokens (nn.Embedding): output embedding
|
1205 |
+
"""
|
1206 |
+
|
1207 |
+
def __init__(
|
1208 |
+
self, config: IndicTransConfig, embed_tokens: Optional[nn.Embedding] = None
|
1209 |
+
):
|
1210 |
+
super().__init__(config)
|
1211 |
+
self.dropout = config.dropout
|
1212 |
+
self.layerdrop = config.decoder_layerdrop
|
1213 |
+
|
1214 |
+
embed_dim = config.encoder_embed_dim
|
1215 |
+
self.padding_idx = config.pad_token_id
|
1216 |
+
self.max_target_positions = config.max_target_positions
|
1217 |
+
self.embed_scale = math.sqrt(embed_dim) if config.scale_embedding else 1.0
|
1218 |
+
|
1219 |
+
self.embed_tokens = nn.Embedding(
|
1220 |
+
config.decoder_vocab_size, embed_dim, self.padding_idx
|
1221 |
+
)
|
1222 |
+
|
1223 |
+
if embed_tokens is not None:
|
1224 |
+
self.embed_tokens.weight = embed_tokens.weight
|
1225 |
+
|
1226 |
+
self.embed_positions = IndicTransSinusoidalPositionalEmbedding(
|
1227 |
+
config.max_target_positions,
|
1228 |
+
embed_dim,
|
1229 |
+
self.padding_idx,
|
1230 |
+
)
|
1231 |
+
self.layers = nn.ModuleList(
|
1232 |
+
[IndicTransDecoderLayer(config) for _ in range(config.decoder_layers)]
|
1233 |
+
)
|
1234 |
+
self.layer_norm = (
|
1235 |
+
nn.LayerNorm(embed_dim) if config.decoder_normalize_before else None
|
1236 |
+
)
|
1237 |
+
self.layernorm_embedding = (
|
1238 |
+
nn.LayerNorm(embed_dim) if config.layernorm_embedding else None
|
1239 |
+
)
|
1240 |
+
|
1241 |
+
self._use_flash_attention_2 = config._attn_implementation == "flash_attention_2"
|
1242 |
+
self._use_sdpa = config._attn_implementation == "sdpa"
|
1243 |
+
|
1244 |
+
self.gradient_checkpointing = False
|
1245 |
+
# Initialize weights and apply final processing
|
1246 |
+
self.post_init()
|
1247 |
+
|
1248 |
+
def forward(
|
1249 |
+
self,
|
1250 |
+
input_ids: Optional[torch.Tensor] = None,
|
1251 |
+
attention_mask: Optional[torch.Tensor] = None,
|
1252 |
+
encoder_hidden_states: Optional[torch.Tensor] = None,
|
1253 |
+
encoder_attention_mask: Optional[torch.Tensor] = None,
|
1254 |
+
head_mask: Optional[torch.Tensor] = None,
|
1255 |
+
cross_attn_head_mask: Optional[torch.Tensor] = None,
|
1256 |
+
past_key_values: Optional[List[torch.FloatTensor]] = None,
|
1257 |
+
inputs_embeds: Optional[torch.Tensor] = None,
|
1258 |
+
use_cache: Optional[bool] = None,
|
1259 |
+
output_attentions: Optional[bool] = None,
|
1260 |
+
output_hidden_states: Optional[bool] = None,
|
1261 |
+
return_dict: Optional[bool] = None,
|
1262 |
+
):
|
1263 |
+
r"""
|
1264 |
+
Args:
|
1265 |
+
input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
|
1266 |
+
Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you
|
1267 |
+
provide it.
|
1268 |
+
|
1269 |
+
Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
|
1270 |
+
[`PreTrainedTokenizer.__call__`] for details.
|
1271 |
+
|
1272 |
+
[What are input IDs?](../glossary#input-ids)
|
1273 |
+
attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
|
1274 |
+
Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
|
1275 |
+
|
1276 |
+
- 1 for tokens that are **not masked**,
|
1277 |
+
- 0 for tokens that are **masked**.
|
1278 |
+
|
1279 |
+
[What are attention masks?](../glossary#attention-mask)
|
1280 |
+
encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, encoder_sequence_length, hidden_size)`, *optional*):
|
1281 |
+
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention
|
1282 |
+
of the decoder.
|
1283 |
+
encoder_attention_mask (`torch.LongTensor` of shape `(batch_size, encoder_sequence_length)`, *optional*):
|
1284 |
+
Mask to avoid performing cross-attention on padding tokens indices of encoder input_ids. Mask values
|
1285 |
+
selected in `[0, 1]`:
|
1286 |
+
|
1287 |
+
- 1 for tokens that are **not masked**,
|
1288 |
+
- 0 for tokens that are **masked**.
|
1289 |
+
|
1290 |
+
[What are attention masks?](../glossary#attention-mask)
|
1291 |
+
head_mask (`torch.Tensor` of shape `(decoder_layers, decoder_attention_heads)`, *optional*):
|
1292 |
+
Mask to nullify selected heads of the attention modules. Mask values selected in `[0, 1]`:
|
1293 |
+
|
1294 |
+
- 1 indicates the head is **not masked**,
|
1295 |
+
- 0 indicates the head is **masked**.
|
1296 |
+
|
1297 |
+
cross_attn_head_mask (`torch.Tensor` of shape `(decoder_layers, decoder_attention_heads)`, *optional*):
|
1298 |
+
Mask to nullify selected heads of the cross-attention modules in the decoder to avoid performing
|
1299 |
+
cross-attention on hidden heads. Mask values selected in `[0, 1]`:
|
1300 |
+
|
1301 |
+
- 1 indicates the head is **not masked**,
|
1302 |
+
- 0 indicates the head is **masked**.
|
1303 |
+
|
1304 |
+
past_key_values (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
|
1305 |
+
Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of
|
1306 |
+
shape `(batch_size, num_heads, sequence_length, embed_size_per_head)`) and 2 additional tensors of
|
1307 |
+
shape `(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)`.
|
1308 |
+
|
1309 |
+
Contains pre-computed hidden-states (key and values in the self-attention blocks and in the
|
1310 |
+
cross-attention blocks) that can be used (see `past_key_values` input) to speed up sequential decoding.
|
1311 |
+
|
1312 |
+
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
|
1313 |
+
that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
|
1314 |
+
all `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of
|
1315 |
+
shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing
|
1316 |
+
`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more
|
1317 |
+
control over how to convert `input_ids` indices into associated vectors than the model's internal
|
1318 |
+
embedding lookup matrix.
|
1319 |
+
output_attentions (`bool`, *optional*):
|
1320 |
+
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
|
1321 |
+
returned tensors for more detail.
|
1322 |
+
output_hidden_states (`bool`, *optional*):
|
1323 |
+
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
|
1324 |
+
for more detail.
|
1325 |
+
return_dict (`bool`, *optional*):
|
1326 |
+
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
|
1327 |
+
"""
|
1328 |
+
output_attentions = (
|
1329 |
+
output_attentions
|
1330 |
+
if output_attentions is not None
|
1331 |
+
else self.config.output_attentions
|
1332 |
+
)
|
1333 |
+
output_hidden_states = (
|
1334 |
+
output_hidden_states
|
1335 |
+
if output_hidden_states is not None
|
1336 |
+
else self.config.output_hidden_states
|
1337 |
+
)
|
1338 |
+
use_cache = use_cache if use_cache is not None else self.config.use_cache
|
1339 |
+
return_dict = (
|
1340 |
+
return_dict if return_dict is not None else self.config.use_return_dict
|
1341 |
+
)
|
1342 |
+
|
1343 |
+
# retrieve input_ids and inputs_embeds
|
1344 |
+
if input_ids is not None and inputs_embeds is not None:
|
1345 |
+
raise ValueError(
|
1346 |
+
"You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time"
|
1347 |
+
)
|
1348 |
+
elif input_ids is not None:
|
1349 |
+
input_shape = input_ids.size()
|
1350 |
+
input_ids = input_ids.view(-1, input_shape[-1])
|
1351 |
+
elif inputs_embeds is not None:
|
1352 |
+
input_shape = inputs_embeds.size()[:-1]
|
1353 |
+
else:
|
1354 |
+
raise ValueError(
|
1355 |
+
"You have to specify either decoder_input_ids or decoder_inputs_embeds"
|
1356 |
+
)
|
1357 |
+
|
1358 |
+
# past_key_values_length
|
1359 |
+
past_key_values_length = (
|
1360 |
+
past_key_values[0][0].shape[2] if past_key_values is not None else 0
|
1361 |
+
)
|
1362 |
+
|
1363 |
+
if inputs_embeds is None:
|
1364 |
+
inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
|
1365 |
+
|
1366 |
+
|
1367 |
+
if self._use_flash_attention_2:
|
1368 |
+
# 2d mask is passed through the layers
|
1369 |
+
attention_mask = attention_mask if (attention_mask is not None and 0 in attention_mask) else None
|
1370 |
+
elif self._use_sdpa and not output_attentions and cross_attn_head_mask is None:
|
1371 |
+
# output_attentions=True & cross_attn_head_mask can not be supported when using SDPA, and we fall back on
|
1372 |
+
# the manual implementation that requires a 4D causal mask in all cases.
|
1373 |
+
attention_mask = _prepare_4d_causal_attention_mask_for_sdpa(
|
1374 |
+
attention_mask,
|
1375 |
+
input_shape,
|
1376 |
+
inputs_embeds,
|
1377 |
+
past_key_values_length,
|
1378 |
+
)
|
1379 |
+
else:
|
1380 |
+
# 4d mask is passed through the layers
|
1381 |
+
attention_mask = _prepare_4d_causal_attention_mask(
|
1382 |
+
attention_mask, input_shape, inputs_embeds, past_key_values_length
|
1383 |
+
)
|
1384 |
+
|
1385 |
+
# expand encoder attention mask
|
1386 |
+
if encoder_hidden_states is not None and encoder_attention_mask is not None:
|
1387 |
+
if self._use_flash_attention_2:
|
1388 |
+
encoder_attention_mask = encoder_attention_mask if 0 in encoder_attention_mask else None
|
1389 |
+
elif self._use_sdpa and cross_attn_head_mask is None and not output_attentions:
|
1390 |
+
# output_attentions=True & cross_attn_head_mask can not be supported when using SDPA, and we fall back on
|
1391 |
+
# the manual implementation that requires a 4D causal mask in all cases.
|
1392 |
+
# [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
|
1393 |
+
encoder_attention_mask = _prepare_4d_attention_mask_for_sdpa(
|
1394 |
+
encoder_attention_mask,
|
1395 |
+
inputs_embeds.dtype,
|
1396 |
+
tgt_len=input_shape[-1],
|
1397 |
+
)
|
1398 |
+
else:
|
1399 |
+
# [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
|
1400 |
+
encoder_attention_mask = _prepare_4d_attention_mask(
|
1401 |
+
encoder_attention_mask, inputs_embeds.dtype, tgt_len=input_shape[-1]
|
1402 |
+
)
|
1403 |
+
|
1404 |
+
# embed positions
|
1405 |
+
positions = self.embed_positions(
|
1406 |
+
input_ids, inputs_embeds, past_key_values_length
|
1407 |
+
)
|
1408 |
+
positions = positions.to(inputs_embeds.device)
|
1409 |
+
|
1410 |
+
hidden_states = inputs_embeds + positions
|
1411 |
+
if self.layernorm_embedding is not None:
|
1412 |
+
hidden_states = self.layernorm_embedding(hidden_states)
|
1413 |
+
|
1414 |
+
hidden_states = F.dropout(hidden_states, p=self.dropout, training=self.training)
|
1415 |
+
|
1416 |
+
if self.gradient_checkpointing and self.training:
|
1417 |
+
if use_cache:
|
1418 |
+
logger.warning_once(
|
1419 |
+
"`use_cache=True` is incompatible with gradient checkpointing. Setting"
|
1420 |
+
" `use_cache=False`..."
|
1421 |
+
)
|
1422 |
+
use_cache = False
|
1423 |
+
|
1424 |
+
# decoder layers
|
1425 |
+
all_hidden_states = () if output_hidden_states else None
|
1426 |
+
all_self_attns = () if output_attentions else None
|
1427 |
+
all_cross_attentions = () if output_attentions else None
|
1428 |
+
next_decoder_cache = () if use_cache else None
|
1429 |
+
|
1430 |
+
# check if head_mask/cross_attn_head_mask has a correct number of layers specified if desired
|
1431 |
+
for attn_mask, mask_name in zip(
|
1432 |
+
[head_mask, cross_attn_head_mask], ["head_mask", "cross_attn_head_mask"]
|
1433 |
+
):
|
1434 |
+
if attn_mask is not None:
|
1435 |
+
if attn_mask.size()[0] != len(self.layers):
|
1436 |
+
raise ValueError(
|
1437 |
+
f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for"
|
1438 |
+
f" {head_mask.size()[0]}."
|
1439 |
+
)
|
1440 |
+
deepspeed_zero3_is_enabled = is_deepspeed_zero3_enabled()
|
1441 |
+
|
1442 |
+
for idx, decoder_layer in enumerate(self.layers):
|
1443 |
+
if output_hidden_states:
|
1444 |
+
all_hidden_states += (hidden_states,)
|
1445 |
+
|
1446 |
+
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
|
1447 |
+
dropout_probability = torch.rand([])
|
1448 |
+
|
1449 |
+
skip_the_layer = (
|
1450 |
+
True
|
1451 |
+
if self.training and (dropout_probability < self.layerdrop)
|
1452 |
+
else False
|
1453 |
+
)
|
1454 |
+
if not skip_the_layer or deepspeed_zero3_is_enabled:
|
1455 |
+
# under deepspeed zero3 all gpus must run in sync
|
1456 |
+
|
1457 |
+
past_key_value = (
|
1458 |
+
past_key_values[idx] if past_key_values is not None else None
|
1459 |
+
)
|
1460 |
+
|
1461 |
+
if self.gradient_checkpointing and self.training:
|
1462 |
+
|
1463 |
+
def create_custom_forward(module):
|
1464 |
+
def custom_forward(*inputs):
|
1465 |
+
# None for past_key_value
|
1466 |
+
return module(*inputs, output_attentions, use_cache)
|
1467 |
+
|
1468 |
+
return custom_forward
|
1469 |
+
|
1470 |
+
layer_outputs = torch.utils.checkpoint.checkpoint(
|
1471 |
+
create_custom_forward(decoder_layer),
|
1472 |
+
hidden_states,
|
1473 |
+
attention_mask,
|
1474 |
+
encoder_hidden_states,
|
1475 |
+
encoder_attention_mask,
|
1476 |
+
head_mask[idx] if head_mask is not None else None,
|
1477 |
+
cross_attn_head_mask[idx]
|
1478 |
+
if cross_attn_head_mask is not None
|
1479 |
+
else None,
|
1480 |
+
None,
|
1481 |
+
)
|
1482 |
+
else:
|
1483 |
+
layer_outputs = decoder_layer(
|
1484 |
+
hidden_states,
|
1485 |
+
attention_mask=attention_mask,
|
1486 |
+
encoder_hidden_states=encoder_hidden_states,
|
1487 |
+
encoder_attention_mask=encoder_attention_mask,
|
1488 |
+
layer_head_mask=(
|
1489 |
+
head_mask[idx] if head_mask is not None else None
|
1490 |
+
),
|
1491 |
+
cross_attn_layer_head_mask=(
|
1492 |
+
cross_attn_head_mask[idx]
|
1493 |
+
if cross_attn_head_mask is not None
|
1494 |
+
else None
|
1495 |
+
),
|
1496 |
+
past_key_value=past_key_value,
|
1497 |
+
output_attentions=output_attentions,
|
1498 |
+
use_cache=use_cache,
|
1499 |
+
)
|
1500 |
+
|
1501 |
+
hidden_states = layer_outputs[0]
|
1502 |
+
|
1503 |
+
if skip_the_layer:
|
1504 |
+
continue
|
1505 |
+
|
1506 |
+
if use_cache:
|
1507 |
+
next_decoder_cache += (layer_outputs[3 if output_attentions else 1],)
|
1508 |
+
|
1509 |
+
if output_attentions:
|
1510 |
+
all_self_attns += (layer_outputs[1],)
|
1511 |
+
all_cross_attentions += (layer_outputs[2],)
|
1512 |
+
|
1513 |
+
if self.layer_norm is not None:
|
1514 |
+
hidden_states = self.layer_norm(hidden_states)
|
1515 |
+
|
1516 |
+
# add hidden states from the last decoder layer
|
1517 |
+
if output_hidden_states:
|
1518 |
+
all_hidden_states += (hidden_states,)
|
1519 |
+
|
1520 |
+
next_cache = next_decoder_cache if use_cache else None
|
1521 |
+
if not return_dict:
|
1522 |
+
return tuple(
|
1523 |
+
v
|
1524 |
+
for v in [
|
1525 |
+
hidden_states,
|
1526 |
+
next_cache,
|
1527 |
+
all_hidden_states,
|
1528 |
+
all_self_attns,
|
1529 |
+
all_cross_attentions,
|
1530 |
+
]
|
1531 |
+
if v is not None
|
1532 |
+
)
|
1533 |
+
return BaseModelOutputWithPastAndCrossAttentions(
|
1534 |
+
last_hidden_state=hidden_states,
|
1535 |
+
past_key_values=next_cache,
|
1536 |
+
hidden_states=all_hidden_states,
|
1537 |
+
attentions=all_self_attns,
|
1538 |
+
cross_attentions=all_cross_attentions,
|
1539 |
+
)
|
1540 |
+
|
1541 |
+
|
1542 |
+
# Copied from transformers.models.m2m_100.modeling_m2m_100.M2M100Model->IndicTrans
|
1543 |
+
class IndicTransModel(IndicTransPreTrainedModel):
|
1544 |
+
_tied_weights_keys = None
|
1545 |
+
|
1546 |
+
def __init__(self, config: IndicTransConfig):
|
1547 |
+
super().__init__(config)
|
1548 |
+
|
1549 |
+
self.encoder = IndicTransEncoder(config)
|
1550 |
+
self.decoder = IndicTransDecoder(config)
|
1551 |
+
|
1552 |
+
# Initialize weights and apply final processing
|
1553 |
+
self.post_init()
|
1554 |
+
|
1555 |
+
def get_encoder(self):
|
1556 |
+
return self.encoder
|
1557 |
+
|
1558 |
+
def get_decoder(self):
|
1559 |
+
return self.decoder
|
1560 |
+
|
1561 |
+
def forward(
|
1562 |
+
self,
|
1563 |
+
input_ids: Optional[torch.LongTensor] = None,
|
1564 |
+
attention_mask: Optional[torch.Tensor] = None,
|
1565 |
+
decoder_input_ids: Optional[torch.LongTensor] = None,
|
1566 |
+
decoder_attention_mask: Optional[torch.LongTensor] = None,
|
1567 |
+
head_mask: Optional[torch.Tensor] = None,
|
1568 |
+
decoder_head_mask: Optional[torch.Tensor] = None,
|
1569 |
+
cross_attn_head_mask: Optional[torch.Tensor] = None,
|
1570 |
+
encoder_outputs: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
|
1571 |
+
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
|
1572 |
+
inputs_embeds: Optional[torch.FloatTensor] = None,
|
1573 |
+
decoder_inputs_embeds: Optional[torch.FloatTensor] = None,
|
1574 |
+
use_cache: Optional[bool] = None,
|
1575 |
+
output_attentions: Optional[bool] = None,
|
1576 |
+
output_hidden_states: Optional[bool] = None,
|
1577 |
+
return_dict: Optional[bool] = None,
|
1578 |
+
) -> Union[Tuple[torch.Tensor], Seq2SeqModelOutput]:
|
1579 |
+
output_attentions = (
|
1580 |
+
output_attentions
|
1581 |
+
if output_attentions is not None
|
1582 |
+
else self.config.output_attentions
|
1583 |
+
)
|
1584 |
+
output_hidden_states = (
|
1585 |
+
output_hidden_states
|
1586 |
+
if output_hidden_states is not None
|
1587 |
+
else self.config.output_hidden_states
|
1588 |
+
)
|
1589 |
+
use_cache = use_cache if use_cache is not None else self.config.use_cache
|
1590 |
+
return_dict = (
|
1591 |
+
return_dict if return_dict is not None else self.config.use_return_dict
|
1592 |
+
)
|
1593 |
+
|
1594 |
+
if encoder_outputs is None:
|
1595 |
+
encoder_outputs = self.encoder(
|
1596 |
+
input_ids=input_ids,
|
1597 |
+
attention_mask=attention_mask,
|
1598 |
+
head_mask=head_mask,
|
1599 |
+
inputs_embeds=inputs_embeds,
|
1600 |
+
output_attentions=output_attentions,
|
1601 |
+
output_hidden_states=output_hidden_states,
|
1602 |
+
return_dict=return_dict,
|
1603 |
+
)
|
1604 |
+
# If the user passed a tuple for encoder_outputs, we wrap it in a BaseModelOutput when return_dict=True
|
1605 |
+
elif return_dict and not isinstance(encoder_outputs, BaseModelOutput):
|
1606 |
+
encoder_outputs = BaseModelOutput(
|
1607 |
+
last_hidden_state=encoder_outputs[0],
|
1608 |
+
hidden_states=encoder_outputs[1] if len(encoder_outputs) > 1 else None,
|
1609 |
+
attentions=encoder_outputs[2] if len(encoder_outputs) > 2 else None,
|
1610 |
+
)
|
1611 |
+
|
1612 |
+
# decoder outputs consists of (dec_features, past_key_value, dec_hidden, dec_attn)
|
1613 |
+
decoder_outputs = self.decoder(
|
1614 |
+
input_ids=decoder_input_ids,
|
1615 |
+
attention_mask=decoder_attention_mask,
|
1616 |
+
encoder_hidden_states=encoder_outputs[0],
|
1617 |
+
encoder_attention_mask=attention_mask,
|
1618 |
+
head_mask=decoder_head_mask,
|
1619 |
+
cross_attn_head_mask=cross_attn_head_mask,
|
1620 |
+
past_key_values=past_key_values,
|
1621 |
+
inputs_embeds=decoder_inputs_embeds,
|
1622 |
+
use_cache=use_cache,
|
1623 |
+
output_attentions=output_attentions,
|
1624 |
+
output_hidden_states=output_hidden_states,
|
1625 |
+
return_dict=return_dict,
|
1626 |
+
)
|
1627 |
+
|
1628 |
+
if not return_dict:
|
1629 |
+
return decoder_outputs + encoder_outputs
|
1630 |
+
|
1631 |
+
return Seq2SeqModelOutput(
|
1632 |
+
last_hidden_state=decoder_outputs.last_hidden_state,
|
1633 |
+
past_key_values=decoder_outputs.past_key_values,
|
1634 |
+
decoder_hidden_states=decoder_outputs.hidden_states,
|
1635 |
+
decoder_attentions=decoder_outputs.attentions,
|
1636 |
+
cross_attentions=decoder_outputs.cross_attentions,
|
1637 |
+
encoder_last_hidden_state=encoder_outputs.last_hidden_state,
|
1638 |
+
encoder_hidden_states=encoder_outputs.hidden_states,
|
1639 |
+
encoder_attentions=encoder_outputs.attentions,
|
1640 |
+
)
|
1641 |
+
|
1642 |
+
|
1643 |
+
# Copied from transformers.models.m2m_100.modeling_m2m_100.M2M100ForConditionalGeneration->IndicTrans
|
1644 |
+
class IndicTransForConditionalGeneration(IndicTransPreTrainedModel):
|
1645 |
+
base_model_prefix = "model"
|
1646 |
+
_tied_weights_keys = None
|
1647 |
+
_label_smoothing = 0.0
|
1648 |
+
|
1649 |
+
def __init__(self, config: IndicTransConfig):
|
1650 |
+
super().__init__(config)
|
1651 |
+
self.model = IndicTransModel(config)
|
1652 |
+
self.lm_head = nn.Linear(
|
1653 |
+
config.decoder_embed_dim, config.decoder_vocab_size, bias=False
|
1654 |
+
)
|
1655 |
+
|
1656 |
+
if config.share_decoder_input_output_embed:
|
1657 |
+
self.lm_head.weight = self.model.decoder.embed_tokens.weight
|
1658 |
+
|
1659 |
+
self.post_init()
|
1660 |
+
|
1661 |
+
def tie_weights(self):
|
1662 |
+
pass
|
1663 |
+
|
1664 |
+
def get_encoder(self):
|
1665 |
+
return self.model.get_encoder()
|
1666 |
+
|
1667 |
+
def get_decoder(self):
|
1668 |
+
return self.model.get_decoder()
|
1669 |
+
|
1670 |
+
def get_output_embeddings(self):
|
1671 |
+
return self.lm_head
|
1672 |
+
|
1673 |
+
def set_output_embeddings(self, new_embeddings):
|
1674 |
+
self.lm_head = new_embeddings
|
1675 |
+
|
1676 |
+
def set_label_smoothing(self, label_smoothing):
|
1677 |
+
self._label_smoothing = label_smoothing
|
1678 |
+
|
1679 |
+
def forward(
|
1680 |
+
self,
|
1681 |
+
input_ids: Optional[torch.LongTensor] = None,
|
1682 |
+
attention_mask: Optional[torch.Tensor] = None,
|
1683 |
+
decoder_input_ids: Optional[torch.LongTensor] = None,
|
1684 |
+
decoder_attention_mask: Optional[torch.LongTensor] = None,
|
1685 |
+
head_mask: Optional[torch.Tensor] = None,
|
1686 |
+
decoder_head_mask: Optional[torch.Tensor] = None,
|
1687 |
+
cross_attn_head_mask: Optional[torch.Tensor] = None,
|
1688 |
+
encoder_outputs: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
|
1689 |
+
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
|
1690 |
+
inputs_embeds: Optional[torch.FloatTensor] = None,
|
1691 |
+
decoder_inputs_embeds: Optional[torch.FloatTensor] = None,
|
1692 |
+
labels: Optional[torch.LongTensor] = None,
|
1693 |
+
use_cache: Optional[bool] = None,
|
1694 |
+
output_attentions: Optional[bool] = None,
|
1695 |
+
output_hidden_states: Optional[bool] = None,
|
1696 |
+
return_dict: Optional[bool] = None,
|
1697 |
+
) -> Union[Tuple[torch.Tensor], Seq2SeqLMOutput]:
|
1698 |
+
r"""
|
1699 |
+
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
|
1700 |
+
Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
|
1701 |
+
config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
|
1702 |
+
(masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
|
1703 |
+
|
1704 |
+
Returns:
|
1705 |
+
"""
|
1706 |
+
return_dict = (
|
1707 |
+
return_dict if return_dict is not None else self.config.use_return_dict
|
1708 |
+
)
|
1709 |
+
|
1710 |
+
if labels is not None:
|
1711 |
+
if decoder_input_ids is None:
|
1712 |
+
decoder_input_ids = shift_tokens_right(
|
1713 |
+
labels, self.config.pad_token_id, self.config.decoder_start_token_id
|
1714 |
+
)
|
1715 |
+
|
1716 |
+
outputs = self.model(
|
1717 |
+
input_ids,
|
1718 |
+
attention_mask=attention_mask,
|
1719 |
+
decoder_input_ids=decoder_input_ids,
|
1720 |
+
encoder_outputs=encoder_outputs,
|
1721 |
+
decoder_attention_mask=decoder_attention_mask,
|
1722 |
+
head_mask=head_mask,
|
1723 |
+
decoder_head_mask=decoder_head_mask,
|
1724 |
+
cross_attn_head_mask=cross_attn_head_mask,
|
1725 |
+
past_key_values=past_key_values,
|
1726 |
+
inputs_embeds=inputs_embeds,
|
1727 |
+
decoder_inputs_embeds=decoder_inputs_embeds,
|
1728 |
+
use_cache=use_cache,
|
1729 |
+
output_attentions=output_attentions,
|
1730 |
+
output_hidden_states=output_hidden_states,
|
1731 |
+
return_dict=return_dict,
|
1732 |
+
)
|
1733 |
+
lm_logits = self.lm_head(outputs[0])
|
1734 |
+
|
1735 |
+
masked_lm_loss = None
|
1736 |
+
if labels is not None:
|
1737 |
+
# move labels to the correct device to enable PP
|
1738 |
+
labels = labels.to(lm_logits.device)
|
1739 |
+
masked_lm_loss = F.cross_entropy(
|
1740 |
+
input=lm_logits.view(-1, self.config.decoder_vocab_size),
|
1741 |
+
target=labels.view(-1),
|
1742 |
+
ignore_index=-100,
|
1743 |
+
label_smoothing=self._label_smoothing,
|
1744 |
+
)
|
1745 |
+
|
1746 |
+
if not return_dict:
|
1747 |
+
output = (lm_logits,) + outputs[1:]
|
1748 |
+
return (
|
1749 |
+
((masked_lm_loss,) + output) if masked_lm_loss is not None else output
|
1750 |
+
)
|
1751 |
+
|
1752 |
+
return Seq2SeqLMOutput(
|
1753 |
+
loss=masked_lm_loss,
|
1754 |
+
logits=lm_logits,
|
1755 |
+
past_key_values=outputs.past_key_values,
|
1756 |
+
decoder_hidden_states=outputs.decoder_hidden_states,
|
1757 |
+
decoder_attentions=outputs.decoder_attentions,
|
1758 |
+
cross_attentions=outputs.cross_attentions,
|
1759 |
+
encoder_last_hidden_state=outputs.encoder_last_hidden_state,
|
1760 |
+
encoder_hidden_states=outputs.encoder_hidden_states,
|
1761 |
+
encoder_attentions=outputs.encoder_attentions,
|
1762 |
+
)
|
1763 |
+
|
1764 |
+
def prepare_inputs_for_generation(
|
1765 |
+
self,
|
1766 |
+
decoder_input_ids,
|
1767 |
+
past_key_values=None,
|
1768 |
+
attention_mask=None,
|
1769 |
+
head_mask=None,
|
1770 |
+
decoder_head_mask=None,
|
1771 |
+
cross_attn_head_mask=None,
|
1772 |
+
use_cache=None,
|
1773 |
+
encoder_outputs=None,
|
1774 |
+
**kwargs,
|
1775 |
+
):
|
1776 |
+
# cut decoder_input_ids if past is used
|
1777 |
+
if past_key_values is not None:
|
1778 |
+
decoder_input_ids = decoder_input_ids[:, -1:]
|
1779 |
+
|
1780 |
+
return {
|
1781 |
+
"input_ids": None, # encoder_outputs is defined. input_ids not needed
|
1782 |
+
"encoder_outputs": encoder_outputs,
|
1783 |
+
"past_key_values": past_key_values,
|
1784 |
+
"decoder_input_ids": decoder_input_ids,
|
1785 |
+
"attention_mask": attention_mask,
|
1786 |
+
"head_mask": head_mask,
|
1787 |
+
"decoder_head_mask": decoder_head_mask,
|
1788 |
+
"cross_attn_head_mask": cross_attn_head_mask,
|
1789 |
+
"use_cache": use_cache, # change this to avoid caching (presumably for debugging)
|
1790 |
+
}
|
1791 |
+
|
1792 |
+
@staticmethod
|
1793 |
+
def _reorder_cache(past_key_values, beam_idx):
|
1794 |
+
reordered_past = ()
|
1795 |
+
for layer_past in past_key_values:
|
1796 |
+
reordered_past += (
|
1797 |
+
tuple(
|
1798 |
+
past_state.index_select(0, beam_idx) for past_state in layer_past
|
1799 |
+
),
|
1800 |
+
)
|
1801 |
+
return reordered_past
|
special_tokens_map.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "</s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "<pad>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"unk_token": {
|
24 |
+
"content": "<unk>",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
}
|
30 |
+
}
|
tokenization_indictrans.py
ADDED
@@ -0,0 +1,261 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import json
|
3 |
+
|
4 |
+
from typing import Dict, List, Optional, Union, Tuple
|
5 |
+
|
6 |
+
from transformers.utils import logging
|
7 |
+
from sentencepiece import SentencePieceProcessor
|
8 |
+
from transformers.tokenization_utils import PreTrainedTokenizer
|
9 |
+
|
10 |
+
|
11 |
+
logger = logging.get_logger(__name__)
|
12 |
+
|
13 |
+
SPIECE_UNDERLINE = "▁"
|
14 |
+
|
15 |
+
SPECIAL_TAGS = {
|
16 |
+
"_bt_",
|
17 |
+
"_ft_",
|
18 |
+
"asm_Beng",
|
19 |
+
"awa_Deva",
|
20 |
+
"ben_Beng",
|
21 |
+
"bho_Deva",
|
22 |
+
"brx_Deva",
|
23 |
+
"doi_Deva",
|
24 |
+
"eng_Latn",
|
25 |
+
"gom_Deva",
|
26 |
+
"gon_Deva",
|
27 |
+
"guj_Gujr",
|
28 |
+
"hin_Deva",
|
29 |
+
"hne_Deva",
|
30 |
+
"kan_Knda",
|
31 |
+
"kas_Arab",
|
32 |
+
"kas_Deva",
|
33 |
+
"kha_Latn",
|
34 |
+
"lus_Latn",
|
35 |
+
"mag_Deva",
|
36 |
+
"mai_Deva",
|
37 |
+
"mal_Mlym",
|
38 |
+
"mar_Deva",
|
39 |
+
"mni_Beng",
|
40 |
+
"mni_Mtei",
|
41 |
+
"npi_Deva",
|
42 |
+
"ory_Orya",
|
43 |
+
"pan_Guru",
|
44 |
+
"san_Deva",
|
45 |
+
"sat_Olck",
|
46 |
+
"snd_Arab",
|
47 |
+
"snd_Deva",
|
48 |
+
"tam_Taml",
|
49 |
+
"tel_Telu",
|
50 |
+
"urd_Arab",
|
51 |
+
"unr_Deva",
|
52 |
+
}
|
53 |
+
|
54 |
+
VOCAB_FILES_NAMES = {
|
55 |
+
"src_vocab_fp": "dict.SRC.json",
|
56 |
+
"tgt_vocab_fp": "dict.TGT.json",
|
57 |
+
"src_spm_fp": "model.SRC",
|
58 |
+
"tgt_spm_fp": "model.TGT",
|
59 |
+
}
|
60 |
+
|
61 |
+
|
62 |
+
class IndicTransTokenizer(PreTrainedTokenizer):
|
63 |
+
_added_tokens_encoder = {}
|
64 |
+
_added_tokens_decoder = {}
|
65 |
+
|
66 |
+
vocab_files_names = VOCAB_FILES_NAMES
|
67 |
+
model_input_names = ["input_ids", "attention_mask"]
|
68 |
+
|
69 |
+
def __init__(
|
70 |
+
self,
|
71 |
+
src_vocab_fp=None,
|
72 |
+
tgt_vocab_fp=None,
|
73 |
+
src_spm_fp=None,
|
74 |
+
tgt_spm_fp=None,
|
75 |
+
unk_token="<unk>",
|
76 |
+
bos_token="<s>",
|
77 |
+
eos_token="</s>",
|
78 |
+
pad_token="<pad>",
|
79 |
+
do_lower_case=False,
|
80 |
+
**kwargs,
|
81 |
+
):
|
82 |
+
|
83 |
+
self.src = True
|
84 |
+
|
85 |
+
self.src_vocab_fp = src_vocab_fp
|
86 |
+
self.tgt_vocab_fp = tgt_vocab_fp
|
87 |
+
self.src_spm_fp = src_spm_fp
|
88 |
+
self.tgt_spm_fp = tgt_spm_fp
|
89 |
+
|
90 |
+
self.unk_token = unk_token
|
91 |
+
self.pad_token = pad_token
|
92 |
+
self.eos_token = eos_token
|
93 |
+
self.bos_token = bos_token
|
94 |
+
|
95 |
+
self.encoder = self._load_json(self.src_vocab_fp)
|
96 |
+
if self.unk_token not in self.encoder:
|
97 |
+
raise KeyError("<unk> token must be in vocab")
|
98 |
+
assert self.pad_token in self.encoder
|
99 |
+
self.encoder_rev = {v: k for k, v in self.encoder.items()}
|
100 |
+
|
101 |
+
self.decoder = self._load_json(self.tgt_vocab_fp)
|
102 |
+
if self.unk_token not in self.encoder:
|
103 |
+
raise KeyError("<unk> token must be in vocab")
|
104 |
+
assert self.pad_token in self.encoder
|
105 |
+
self.decoder_rev = {v: k for k, v in self.decoder.items()}
|
106 |
+
|
107 |
+
# load SentencePiece model for pre-processing
|
108 |
+
self.src_spm = self._load_spm(self.src_spm_fp)
|
109 |
+
self.tgt_spm = self._load_spm(self.tgt_spm_fp)
|
110 |
+
|
111 |
+
self.current_spm = self.src_spm
|
112 |
+
self.current_encoder = self.encoder
|
113 |
+
self.current_encoder_rev = self.encoder_rev
|
114 |
+
|
115 |
+
self.unk_token_id = self.encoder[self.unk_token]
|
116 |
+
self.pad_token_id = self.encoder[self.pad_token]
|
117 |
+
self.eos_token_id = self.encoder[self.eos_token]
|
118 |
+
self.bos_token_id = self.encoder[self.bos_token]
|
119 |
+
|
120 |
+
super().__init__(
|
121 |
+
src_vocab_file=self.src_vocab_fp,
|
122 |
+
tgt_vocab_file=self.src_vocab_fp,
|
123 |
+
do_lower_case=do_lower_case,
|
124 |
+
unk_token=unk_token,
|
125 |
+
bos_token=bos_token,
|
126 |
+
eos_token=eos_token,
|
127 |
+
pad_token=pad_token,
|
128 |
+
**kwargs,
|
129 |
+
)
|
130 |
+
|
131 |
+
def add_new_special_tags(self, new_tags: List[str]):
|
132 |
+
SPECIAL_TAGS.update(new_tags)
|
133 |
+
|
134 |
+
def _switch_to_input_mode(self):
|
135 |
+
self.src = True
|
136 |
+
self.padding_side = "left"
|
137 |
+
self.current_spm = self.src_spm
|
138 |
+
self.current_encoder = self.encoder
|
139 |
+
self.current_encoder_rev = self.encoder_rev
|
140 |
+
|
141 |
+
def _switch_to_target_mode(self):
|
142 |
+
self.src = False
|
143 |
+
self.padding_side = "right"
|
144 |
+
self.current_spm = self.tgt_spm
|
145 |
+
self.current_encoder = self.decoder
|
146 |
+
self.current_encoder_rev = self.decoder_rev
|
147 |
+
|
148 |
+
def _load_spm(self, path: str) -> SentencePieceProcessor:
|
149 |
+
return SentencePieceProcessor(model_file=path)
|
150 |
+
|
151 |
+
def _save_json(self, data, path: str) -> None:
|
152 |
+
with open(path, "w", encoding="utf-8") as f:
|
153 |
+
json.dump(data, f, indent=2)
|
154 |
+
|
155 |
+
def _load_json(self, path: str) -> Union[Dict, List]:
|
156 |
+
with open(path, "r", encoding="utf-8") as f:
|
157 |
+
return json.load(f)
|
158 |
+
|
159 |
+
def _split_tags(self, tokens: List[str]) -> Tuple[List[str], List[str]]:
|
160 |
+
tags = [token for token in tokens if token in SPECIAL_TAGS]
|
161 |
+
tokens = [token for token in tokens if token not in SPECIAL_TAGS]
|
162 |
+
return tags, tokens
|
163 |
+
|
164 |
+
def _split_pads(self, tokens: List[str]) -> Tuple[List[str], List[str]]:
|
165 |
+
pads = [token for token in tokens if token == self.pad_token]
|
166 |
+
tokens = [token for token in tokens if token != self.pad_token]
|
167 |
+
return pads, tokens
|
168 |
+
|
169 |
+
@property
|
170 |
+
def src_vocab_size(self) -> int:
|
171 |
+
return len(self.encoder)
|
172 |
+
|
173 |
+
@property
|
174 |
+
def tgt_vocab_size(self) -> int:
|
175 |
+
return len(self.decoder)
|
176 |
+
|
177 |
+
def get_src_vocab(self) -> Dict[str, int]:
|
178 |
+
return dict(self.encoder, **self.added_tokens_encoder)
|
179 |
+
|
180 |
+
def get_tgt_vocab(self) -> Dict[str, int]:
|
181 |
+
return dict(self.decoder, **self.added_tokens_decoder)
|
182 |
+
|
183 |
+
# hack override
|
184 |
+
def get_vocab(self) -> Dict[str, int]:
|
185 |
+
return self.get_src_vocab()
|
186 |
+
|
187 |
+
# hack override
|
188 |
+
@property
|
189 |
+
def vocab_size(self) -> int:
|
190 |
+
return self.src_vocab_size
|
191 |
+
|
192 |
+
def _convert_token_to_id(self, token: str) -> int:
|
193 |
+
"""Converts an token (str) into an index (integer) using the source/target vocabulary map."""
|
194 |
+
return self.current_encoder.get(token, self.current_encoder[self.unk_token])
|
195 |
+
|
196 |
+
def _convert_id_to_token(self, index: int) -> str:
|
197 |
+
"""Converts an index (integer) into a token (str) using the source/target vocabulary map."""
|
198 |
+
return self.current_encoder_rev.get(index, self.unk_token)
|
199 |
+
|
200 |
+
def convert_tokens_to_string(self, tokens: List[str]) -> str:
|
201 |
+
"""Uses sentencepiece model for detokenization"""
|
202 |
+
pads, tokens = self._split_pads(tokens)
|
203 |
+
|
204 |
+
if self.src:
|
205 |
+
|
206 |
+
tags, non_tags = self._split_tags(tokens)
|
207 |
+
|
208 |
+
return (
|
209 |
+
" ".join(pads)
|
210 |
+
+ " "
|
211 |
+
+ " ".join(tags)
|
212 |
+
+ " "
|
213 |
+
+ "".join(non_tags).replace(SPIECE_UNDERLINE, " ").strip()
|
214 |
+
)
|
215 |
+
|
216 |
+
return (
|
217 |
+
"".join(tokens).replace(SPIECE_UNDERLINE, " ").strip()
|
218 |
+
+ " "
|
219 |
+
+ " ".join(pads)
|
220 |
+
)
|
221 |
+
|
222 |
+
def _tokenize(self, text) -> List[str]:
|
223 |
+
if self.src:
|
224 |
+
tokens = text.split(" ")
|
225 |
+
tags, non_tags = self._split_tags(tokens)
|
226 |
+
text = " ".join(non_tags)
|
227 |
+
tokens = self.current_spm.EncodeAsPieces(text)
|
228 |
+
return tags + tokens
|
229 |
+
else:
|
230 |
+
return self.current_spm.EncodeAsPieces(text)
|
231 |
+
|
232 |
+
def build_inputs_with_special_tokens(
|
233 |
+
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
234 |
+
) -> List[int]:
|
235 |
+
if token_ids_1 is None:
|
236 |
+
return token_ids_0 + [self.eos_token_id]
|
237 |
+
# We don't expect to process pairs, but leave the pair logic for API consistency
|
238 |
+
return token_ids_0 + [self.eos_token_id] + token_ids_1 + [self.eos_token_id]
|
239 |
+
|
240 |
+
def save_vocabulary(
|
241 |
+
self, save_directory: str, filename_prefix: Optional[str] = None
|
242 |
+
) -> Tuple[str]:
|
243 |
+
if not os.path.isdir(save_directory):
|
244 |
+
logger.error(f"Vocabulary path ({save_directory}) should be a directory")
|
245 |
+
return
|
246 |
+
|
247 |
+
src_spm_fp = os.path.join(save_directory, "model.SRC")
|
248 |
+
tgt_spm_fp = os.path.join(save_directory, "model.TGT")
|
249 |
+
src_vocab_fp = os.path.join(save_directory, "dict.SRC.json")
|
250 |
+
tgt_vocab_fp = os.path.join(save_directory, "dict.TGT.json")
|
251 |
+
|
252 |
+
self._save_json(self.encoder, src_vocab_fp)
|
253 |
+
self._save_json(self.decoder, tgt_vocab_fp)
|
254 |
+
|
255 |
+
with open(src_spm_fp, "wb") as f:
|
256 |
+
f.write(self.src_spm.serialized_model_proto())
|
257 |
+
|
258 |
+
with open(tgt_spm_fp, "wb") as f:
|
259 |
+
f.write(self.tgt_spm.serialized_model_proto())
|
260 |
+
|
261 |
+
return src_vocab_fp, tgt_vocab_fp, src_spm_fp, tgt_spm_fp
|
tokenizer_config.json
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "<s>",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "<pad>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"2": {
|
20 |
+
"content": "</s>",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"3": {
|
28 |
+
"content": "<unk>",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
}
|
35 |
+
},
|
36 |
+
"auto_map": {
|
37 |
+
"AutoTokenizer": [
|
38 |
+
"tokenization_indictrans.IndicTransTokenizer",
|
39 |
+
null
|
40 |
+
]
|
41 |
+
},
|
42 |
+
"bos_token": "<s>",
|
43 |
+
"clean_up_tokenization_spaces": true,
|
44 |
+
"do_lower_case": false,
|
45 |
+
"eos_token": "</s>",
|
46 |
+
"model_max_length": 256,
|
47 |
+
"pad_token": "<pad>",
|
48 |
+
"tokenizer_class": "IndicTransTokenizer",
|
49 |
+
"unk_token": "<unk>"
|
50 |
+
}
|