package vn.corenlp.tokenizer;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.log4j.helpers.AbsoluteTimeDateFormat;

/* JADX INFO: Access modifiers changed from: package-private */
/* compiled from: Tokenizer.java */
/* loaded from: input_file:vn/corenlp/tokenizer/Regex.class */
public class Regex {
    public static final String ELLIPSIS = "\\.{2,}";
    public static final String EMAIL = "([\\w\\d_\\.-]+)@(([\\d\\w-]+)\\.)*([\\d\\w-]+)";
    public static final String FULL_DATE = "(0?[1-9]|[12][0-9]|3[01])(\\/|-|\\.)(1[0-2]|(0?[1-9]))((\\/|-|\\.)\\d{4})";
    public static final String MONTH = "(1[0-2]|(0?[1-9]))(\\/)\\d{4}";
    public static final String DATE = "(0?[1-9]|[12][0-9]|3[01])(\\/)(1[0-2]|(0?[1-9]))";
    public static final String TIME = "(\\d\\d:\\d\\d:\\d\\d)|((0?\\d|1\\d|2[0-3])(:|h)(0?\\d|[1-5]\\d)(’|'|p|ph)?)";
    public static final String MONEY = "\\p{Sc}\\d+([\\.,]\\d+)*|\\d+([\\.,]\\d+)*\\p{Sc}";
    public static final String PHONE_NUMBER = "(\\(?\\+\\d{1,2}\\)?[\\s\\.-]?)?\\d{2,}[\\s\\.-]?\\d{3,}[\\s\\.-]?\\d{3,}";
    public static final String URL = "(((https?|ftp):\\/\\/|www\\.)[^\\s/$.?#].[^\\s]*)|(https?:\\/\\/)?(www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{2,256}\\.[a-z]{2,6}\\b([-a-zA-Z0-9@:%_\\+.~#?&//=]*)";
    public static final String NUMBER = "[-+]?\\d+([\\.,]\\d+)*%?\\p{Sc}?";
    public static final String PUNCTUATION = ",|\\.|:|\\?|!|;|-|_|\"|'|“|”|\\||\\(|\\)|\\[|\\]|\\{|\\}|âŸ¨|âŸ©|Â«|Â»|\\\\|\\/|\\â€˜|\\â€™|\\â€œ|\\â€�|â€¦|…|‘|’|·";
    public static final String SPECIAL_CHAR = "\\~|\\@|\\#|\\^|\\&|\\*|\\+|\\-|\\â€“|<|>|\\|";
    public static final String EOS_PUNCTUATION = "(\\.+|\\?|!|…)";
    public static final String NUMBERS_EXPRESSION = "[-+]?\\d+([\\.,]\\d+)*%?\\p{Sc}?([\\+\\-\\*\\/][-+]?\\d+([\\.,]\\d+)*%?\\p{Sc}?)*";
    public static final String SHORT_NAME = "([\\p{L}]+([\\.\\-][\\p{L}]+)+)|([\\p{L}]+-\\d+)";
    public static final String WORD_WITH_HYPHEN = "\\p{L}+-\\p{L}+(-\\p{L}+)*";
    public static final String ALLCAP = "[A-Z]+\\.[A-Z]+";
    private static List<String> regexes = null;
    private static List<String> regexIndex = null;

    Regex() {
    }

    public static List<String> getRegexList() {
        if (regexes == null) {
            regexes = new ArrayList();
            regexIndex = new ArrayList();
            regexes.add(ELLIPSIS);
            regexIndex.add("ELLIPSIS");
            regexes.add(EMAIL);
            regexIndex.add("EMAIL");
            regexes.add(URL);
            regexIndex.add("URL");
            regexes.add(FULL_DATE);
            regexIndex.add("FULL_DATE");
            regexes.add(MONTH);
            regexIndex.add("MONTH");
            regexes.add(DATE);
            regexIndex.add(AbsoluteTimeDateFormat.DATE_AND_TIME_DATE_FORMAT);
            regexes.add(TIME);
            regexIndex.add("TIME");
            regexes.add(MONEY);
            regexIndex.add("MONEY");
            regexes.add(PHONE_NUMBER);
            regexIndex.add("PHONE_NUMBER");
            regexes.add(SHORT_NAME);
            regexIndex.add("SHORT_NAME");
            regexes.add(NUMBERS_EXPRESSION);
            regexIndex.add("NUMBERS_EXPRESSION");
            regexes.add(NUMBER);
            regexIndex.add("NUMBER");
            regexes.add(WORD_WITH_HYPHEN);
            regexIndex.add("WORD_WITH_HYPHEN");
            regexes.add(PUNCTUATION);
            regexIndex.add("PUNCTUATION");
            regexes.add(SPECIAL_CHAR);
            regexIndex.add("SPECIAL_CHAR");
            regexes.add(ALLCAP);
            regexIndex.add("ALLCAP");
        }
        return regexes;
    }

    public static int getRegexIndex(String str) {
        return regexIndex.indexOf(str.toUpperCase());
    }

    public static void main(String[] strArr) throws IOException {
        Iterator<String> it2 = Tokenizer.tokenize("93% 9-10 anh-yeu-em").iterator();
        while (it2.hasNext()) {
            System.out.print(it2.next() + StringConst.SPACE);
        }
    }
}
