package vn.corenlp.tokenizer;

import edu.emory.mathcs.nlp.common.treebank.POSTagEn;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:vn/corenlp/tokenizer/Tokenizer.class */
public class Tokenizer {
    /* JADX WARN: Multi-variable type inference failed */
    public static List<String> tokenize(String str) throws IOException {
        if (str == null || str.trim().isEmpty()) {
            return new ArrayList();
        }
        String[] split = str.trim().split("\\s+");
        if (split.length == 0) {
            return new ArrayList();
        }
        List arrayList = new ArrayList();
        for (String str2 : split) {
            if (str2.length() == 1 || !StringUtils.hasPunctuation(str2)) {
                arrayList.add(str2);
            } else if (str2.endsWith(",")) {
                arrayList.addAll(tokenize(str2.substring(0, str2.length() - 1)));
                arrayList.add(",");
            } else if (StringUtils.VN_abbreviation.contains(str2)) {
                arrayList.add(str2);
            } else if (str2.endsWith(".") && Character.isAlphabetic(str2.charAt(str2.length() - 2))) {
                if ((str2.length() == 2 && Character.isUpperCase(str2.charAt(str2.length() - 2))) || Pattern.compile(Regex.SHORT_NAME).matcher(str2).find()) {
                    arrayList.add(str2);
                } else {
                    arrayList.addAll(tokenize(str2.substring(0, str2.length() - 1)));
                    arrayList.add(".");
                }
            } else if (StringUtils.VN_exception.contains(str2)) {
                arrayList.add(str2);
            } else {
                boolean z = false;
                Iterator<String> it2 = StringUtils.VN_abbreviation.iterator();
                while (true) {
                    if (!it2.hasNext()) {
                        break;
                    }
                    String next = it2.next();
                    int indexOf = str2.indexOf(next);
                    if (indexOf >= 0) {
                        z = true;
                        arrayList = recursive(arrayList, str2, indexOf, indexOf + next.length());
                        break;
                    }
                }
                if (!z) {
                    boolean z2 = false;
                    Iterator<String> it3 = StringUtils.VN_exception.iterator();
                    while (true) {
                        if (!it3.hasNext()) {
                            break;
                        }
                        String next2 = it3.next();
                        int indexOf2 = str2.indexOf(next2);
                        if (indexOf2 >= 0) {
                            z2 = true;
                            arrayList = recursive(arrayList, str2, indexOf2, indexOf2 + next2.length());
                            break;
                        }
                    }
                    if (!z2) {
                        List<String> regexList = Regex.getRegexList();
                        boolean z3 = false;
                        Iterator<String> it4 = regexList.iterator();
                        while (true) {
                            if (!it4.hasNext()) {
                                break;
                            }
                            if (str2.matches(it4.next())) {
                                arrayList.add(str2);
                                z3 = true;
                                break;
                            }
                        }
                        if (!z3) {
                            for (int i = 0; i < regexList.size(); i++) {
                                Matcher matcher = Pattern.compile(regexList.get(i)).matcher(str2);
                                if (matcher.find()) {
                                    if (i == Regex.getRegexIndex("url")) {
                                        String[] split2 = str2.split(Pattern.quote("."));
                                        boolean z4 = true;
                                        int length = split2.length;
                                        int i2 = 0;
                                        while (true) {
                                            if (i2 >= length) {
                                                break;
                                            }
                                            String str3 = split2[i2];
                                            if (str3.length() == 1 && Character.isUpperCase(str3.charAt(0))) {
                                                z4 = false;
                                                break;
                                            }
                                            int i3 = 0;
                                            while (true) {
                                                if (i3 >= str3.length()) {
                                                    break;
                                                }
                                                if (str3.charAt(i3) >= 128) {
                                                    z4 = false;
                                                    break;
                                                }
                                                i3++;
                                            }
                                            i2++;
                                        }
                                        if (z4) {
                                            arrayList = recursive(arrayList, str2, matcher.start(), matcher.end());
                                        }
                                    } else if (i == Regex.getRegexIndex("month")) {
                                        int start = matcher.start();
                                        boolean z5 = false;
                                        int i4 = 0;
                                        while (true) {
                                            if (i4 >= start) {
                                                break;
                                            }
                                            if (Character.isLetter(str2.charAt(i4))) {
                                                arrayList = recursive(arrayList, str2, matcher.start(), matcher.end());
                                                z5 = true;
                                                break;
                                            }
                                            i4++;
                                        }
                                        if (!z5) {
                                            arrayList.add(str2);
                                        }
                                    } else {
                                        arrayList = recursive(arrayList, str2, matcher.start(), matcher.end());
                                    }
                                    z3 = true;
                                    break;
                                }
                            }
                            if (!z3) {
                                arrayList.add(str2);
                            }
                        }
                    }
                }
            }
        }
        return arrayList;
    }

    private static List<String> recursive(List<String> list, String str, int i, int i2) throws IOException {
        if (i > 0) {
            list.addAll(tokenize(str.substring(0, i)));
        }
        list.addAll(tokenize(str.substring(i, i2)));
        if (i2 < str.length()) {
            list.addAll(tokenize(str.substring(i2)));
        }
        return list;
    }

    public static List<String> joinSentences(List<String> list) {
        ArrayList arrayList = new ArrayList();
        ArrayList<String> arrayList2 = new ArrayList();
        for (int i = 0; i < list.size(); i++) {
            String str = list.get(i);
            String str2 = i != list.size() - 1 ? list.get(i + 1) : null;
            String str3 = i > 0 ? list.get(i - 1) : null;
            arrayList2.add(str);
            if (i == list.size() - 1) {
                arrayList.add(joinSentence(arrayList2));
                return arrayList;
            }
            if (i < list.size() - 2 && str.equals(":") && ((Character.isDigit(str2.charAt(0)) && list.get(i + 2).equals(".")) || list.get(i + 2).equals(","))) {
                arrayList.add(joinSentence(arrayList2));
                arrayList2.clear();
            } else if (str.matches(Regex.EOS_PUNCTUATION)) {
                if (str2.equals("\"") || str2.equals(POSTagEn.POS_RQ)) {
                    int i2 = 0;
                    for (String str4 : arrayList2) {
                        if (str4.equals("\"") || str4.equals(POSTagEn.POS_RQ)) {
                            i2++;
                        }
                    }
                    if (i2 % 2 == 1) {
                    }
                }
                if (!StringUtils.isBrace(str2) && !str2.isEmpty() && !Character.isLowerCase(str2.charAt(0)) && !str2.equals(",") && !Character.isDigit(str2.charAt(0)) && (arrayList2.size() != 2 || !str.equals(".") || (!Character.isDigit(str3.charAt(0)) && !Character.isLowerCase(str3.charAt(0)) && (!Character.isUpperCase(str3.charAt(0)) || str3.length() != 1)))) {
                    arrayList.add(joinSentence(arrayList2));
                    arrayList2.clear();
                }
            }
        }
        return arrayList;
    }

    public static String joinSentence(List<String> list) {
        StringBuffer stringBuffer = new StringBuffer();
        int size = list.size();
        for (int i = 0; i < size; i++) {
            String str = list.get(i);
            if (!str.isEmpty() && str != null && !str.equals(StringConst.SPACE)) {
                stringBuffer.append(str);
                if (i < size - 1) {
                    stringBuffer.append(StringConst.SPACE);
                }
            }
        }
        return stringBuffer.toString().trim();
    }
}
