package lemming.lemma.ranker;

import cc.mallet.optimize.LimitedMemoryBFGS;
import cc.mallet.optimize.OptimizationException;
import edu.emory.mathcs.nlp.common.constant.StringConst;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
import java.util.logging.Level;
import java.util.logging.Logger;
import lemming.lemma.LemmaCandidateGenerator;
import lemming.lemma.LemmaCandidateGeneratorTrainer;
import lemming.lemma.LemmaInstance;
import lemming.lemma.LemmaOptions;
import lemming.lemma.Lemmatizer;
import lemming.lemma.LemmatizerGenerator;
import lemming.lemma.LemmatizerGeneratorTrainer;
import lemming.lemma.SimpleLemmatizerTrainer;
import lemming.lemma.edit.EditTreeGeneratorTrainer;
import lemming.lemma.toutanova.EditTreeAligner;
import lemming.lemma.toutanova.EditTreeAlignerTrainer;
import marmot.util.Sys;
import net.arnx.jsonic.JSONException;
import org.apache.commons.math3.optimization.direct.CMAESOptimizer;

/* loaded from: input_file:lemming/lemma/ranker/RankerTrainer.class */
public class RankerTrainer implements LemmatizerGeneratorTrainer {
    private RankerTrainerOptions options_ = new RankerTrainerOptions();
    private static final int MAX_NUM_DUPLICATES_ = 3;
    static final /* synthetic */ boolean $assertionsDisabled;

    /* loaded from: input_file:lemming/lemma/ranker/RankerTrainer$RankerTrainerOptions.class */
    public static class RankerTrainerOptions extends LemmaOptions {
        private static final long serialVersionUID = 1;
        public static final String GENERATOR_TRAINERS = "generator-trainers";
        public static final String USE_PERCEPTRON = "use-perceptron";
        public static final String QUADRATIC_PENALTY = "quadratic-penalty";
        public static final String UNIGRAM_FILE = "unigram-file";
        public static final String USE_SHAPE_LEXICON = "use-shape-lexicon";
        public static final String ASPELL_LANG = "aspell-lang";
        public static final String ASPELL_PATH = "aspell-path";
        public static final String USE_CORE_FEATURES = "use-core-features";
        public static final String USE_ALIGNMENT_FEATURES = "use-alignment-features";
        public static final String IGNORE_FEATURES = "ignore-features";
        public static final String NUM_EDIT_TREE_STEPS = "num-edit-tree-steps";
        public static final String COPY_CONJUNCTONS = "copy-conjunctions";
        public static final String TAG_DEPENDENT = "tag-dependent";
        public static final String EDIT_TREE_MIN_COUNT = "edit-tree-min-count";
        public static final String EDIT_TREE_MAX_DEPTH = "edit-tree-max-depth";
        public static final String USE_HASH_FEATURE_TABLE = "use-hash-feature-table";
        public static final String USE_MALLET = "use-mallet";
        public static final String OFFLINE_FEATURE_EXTRACTION = "offline-feature-extraction";
        public static final String CLUSTER_FILE = "cluster-file";

        public RankerTrainerOptions() {
            this.map_.put(GENERATOR_TRAINERS, Arrays.asList(SimpleLemmatizerTrainer.class, EditTreeGeneratorTrainer.class));
            this.map_.put(USE_PERCEPTRON, true);
            this.map_.put("quadratic-penalty", Double.valueOf(CMAESOptimizer.DEFAULT_STOPFITNESS));
            this.map_.put(UNIGRAM_FILE, Arrays.asList(StringConst.EMPTY));
            this.map_.put(USE_SHAPE_LEXICON, false);
            this.map_.put(ASPELL_LANG, StringConst.EMPTY);
            this.map_.put(ASPELL_PATH, StringConst.EMPTY);
            this.map_.put(USE_CORE_FEATURES, true);
            this.map_.put(USE_ALIGNMENT_FEATURES, true);
            this.map_.put(IGNORE_FEATURES, StringConst.EMPTY);
            this.map_.put(NUM_EDIT_TREE_STEPS, 1);
            this.map_.put(COPY_CONJUNCTONS, false);
            this.map_.put("use-hash-feature-table", false);
            this.map_.put("tag-dependent", false);
            this.map_.put(EDIT_TREE_MIN_COUNT, 0);
            this.map_.put(EDIT_TREE_MAX_DEPTH, -1);
            this.map_.put(USE_MALLET, true);
            this.map_.put(OFFLINE_FEATURE_EXTRACTION, true);
            this.map_.put(CLUSTER_FILE, StringConst.EMPTY);
        }

        public RankerTrainerOptions(RankerTrainerOptions rankerTrainerOptions) {
            this.map_ = new HashMap(rankerTrainerOptions.map_);
        }

        public List<Object> getUnigramFile() {
            return (List) getOption(UNIGRAM_FILE);
        }

        public List<Object> getGeneratorTrainers() {
            return (List) getOption(GENERATOR_TRAINERS);
        }

        public boolean getUsePerceptron() {
            return ((Boolean) getOption(USE_PERCEPTRON)).booleanValue();
        }

        public double getQuadraticPenalty() {
            return ((Double) getOption("quadratic-penalty")).doubleValue();
        }

        public List<LemmaCandidateGenerator> getGenerators(List<LemmaInstance> list) {
            LinkedList linkedList = new LinkedList();
            Iterator<Object> it2 = getGeneratorTrainers().iterator();
            while (it2.hasNext()) {
                LemmaCandidateGeneratorTrainer lemmaCandidateGeneratorTrainer = (LemmaCandidateGeneratorTrainer) toInstance((Class) it2.next());
                if (lemmaCandidateGeneratorTrainer instanceof EditTreeGeneratorTrainer) {
                    lemmaCandidateGeneratorTrainer.getOptions().setOption(EditTreeGeneratorTrainer.EditTreeGeneratorTrainerOptions.NUM_STEPS, Integer.valueOf(getNumEditTreeSteps()));
                    lemmaCandidateGeneratorTrainer.getOptions().setOption("tag-dependent", Boolean.valueOf(getTagDependent()));
                    lemmaCandidateGeneratorTrainer.getOptions().setOption(EditTreeGeneratorTrainer.EditTreeGeneratorTrainerOptions.MIN_COUNT, getEditTreeMinCount());
                    lemmaCandidateGeneratorTrainer.getOptions().setOption(EditTreeGeneratorTrainer.EditTreeGeneratorTrainerOptions.MAX_DEPTH, getEditTreeMaxDepth());
                }
                linkedList.add(lemmaCandidateGeneratorTrainer.train(list, null));
            }
            return linkedList;
        }

        private Integer getEditTreeMaxDepth() {
            return (Integer) getOption(EDIT_TREE_MAX_DEPTH);
        }

        private Integer getEditTreeMinCount() {
            return (Integer) getOption(EDIT_TREE_MIN_COUNT);
        }

        public boolean getTagDependent() {
            return ((Boolean) getOption("tag-dependent")).booleanValue();
        }

        public boolean getUseShapeLexicon() {
            return ((Boolean) getOption(USE_SHAPE_LEXICON)).booleanValue();
        }

        public String getAspellPath() {
            return (String) getOption(ASPELL_PATH);
        }

        public String getAspellLang() {
            return (String) getOption(ASPELL_LANG);
        }

        public boolean getUseCoreFeatures() {
            return ((Boolean) getOption(USE_CORE_FEATURES)).booleanValue();
        }

        public boolean getUseAlignmentFeatures() {
            return ((Boolean) getOption(USE_ALIGNMENT_FEATURES)).booleanValue();
        }

        public String getIgnoreFeatures() {
            return (String) getOption(IGNORE_FEATURES);
        }

        public int getNumEditTreeSteps() {
            return ((Integer) getOption(NUM_EDIT_TREE_STEPS)).intValue();
        }

        public boolean getCopyConjunctions() {
            return ((Boolean) getOption(COPY_CONJUNCTONS)).booleanValue();
        }

        public boolean getUseHashFeatureTable() {
            return ((Boolean) getOption("use-hash-feature-table")).booleanValue();
        }

        public boolean getUseMallet() {
            return ((Boolean) getOption(USE_MALLET)).booleanValue();
        }

        public boolean getUseOfflineFeatureExtraction() {
            return ((Boolean) getOption(OFFLINE_FEATURE_EXTRACTION)).booleanValue();
        }

        public String getClusterFile() {
            return (String) getOption(CLUSTER_FILE);
        }
    }

    @Override // lemming.lemma.LemmatizerGeneratorTrainer, lemming.lemma.LemmatizerTrainer, lemming.lemma.LemmaCandidateGeneratorTrainer
    public LemmatizerGenerator train(List<LemmaInstance> list, List<LemmaInstance> list2) {
        return trainReranker(this.options_.getGenerators(list), list);
    }

    private LemmatizerGenerator trainReranker(List<LemmaCandidateGenerator> list, List<LemmaInstance> list2) {
        List<RankerInstance> instances = RankerInstance.getInstances(list2, list);
        RankerModel rankerModel = new RankerModel();
        EditTreeAligner editTreeAligner = (EditTreeAligner) new EditTreeAlignerTrainer(this.options_.getRandom(), false, 1, -1).train(list2);
        Logger.getLogger(getClass().getName()).info("Extracting features");
        rankerModel.init(this.options_, instances, editTreeAligner);
        if (this.options_.getUsePerceptron()) {
            runPerceptron(rankerModel, instances);
        } else {
            runMaxEnt(rankerModel, instances);
        }
        return new Ranker(rankerModel, list);
    }

    private void runMaxEnt(RankerModel rankerModel, List<RankerInstance> list) {
        if (this.options_.getUseMallet()) {
            runMallet(rankerModel, list);
        } else {
            runSgd(rankerModel, list);
        }
    }

    private void runSgd(RankerModel rankerModel, List<RankerInstance> list) {
        LinkedList linkedList = new LinkedList();
        for (RankerInstance rankerInstance : list) {
            int min = Math.min(3, (int) rankerInstance.getInstance().getCount());
            for (int i = 0; i < min; i++) {
                linkedList.add(rankerInstance);
            }
        }
        Logger logger = Logger.getLogger(getClass().getName());
        logger.info(String.format("Created duplicates. Increased num instances from %d to %d.\n", Integer.valueOf(list.size()), Integer.valueOf(linkedList.size())));
        RankerObjective rankerObjective = new RankerObjective(this.options_, rankerModel, linkedList, 3);
        Random random = this.options_.getRandom();
        int i2 = 0;
        for (int i3 = 0; i3 < this.options_.getNumIterations(); i3++) {
            logger.info("SGD step: " + i3);
            Collections.shuffle(linkedList, random);
            Iterator it2 = linkedList.iterator();
            while (it2.hasNext()) {
                rankerObjective.update((RankerInstance) it2.next(), true, 0.1d / (1.0d + (i2 / linkedList.size())));
                i2++;
            }
        }
    }

    /* JADX WARN: Type inference failed for: r5v1, types: [double[], java.io.Serializable] */
    private void runMallet(RankerModel rankerModel, List<RankerInstance> list) {
        Logger logger = Logger.getLogger(getClass().getName());
        double usedMemoryInMegaBytes = Sys.getUsedMemoryInMegaBytes();
        logger.info(String.format("Memory usage of weights array: %g (%g) MB", Double.valueOf(Sys.getUsedMemoryInMegaBytes(rankerModel.getWeights(), false)), Double.valueOf((rankerModel.getWeights().length * 64.0d) / 8388608.0d)));
        logger.info(String.format("Memory usage: %g / %g MB", Double.valueOf(usedMemoryInMegaBytes), Double.valueOf(Sys.getMaxHeapSizeInMegaBytes())));
        logger.info("Start optimization");
        RankerObjective rankerObjective = new RankerObjective(this.options_, rankerModel, list);
        LimitedMemoryBFGS limitedMemoryBFGS = new LimitedMemoryBFGS(rankerObjective);
        Logger.getLogger(limitedMemoryBFGS.getClass().getName()).setLevel(Level.OFF);
        rankerObjective.setParameters(rankerModel.getWeights());
        try {
            limitedMemoryBFGS.optimize(1);
            logger.info(String.format("Memory usage after first iteration: %g / %g MB", Double.valueOf(Sys.getUsedMemoryInMegaBytes()), Double.valueOf(Sys.getMaxHeapSizeInMegaBytes())));
            for (int i = 0; i < 200; i++) {
                if (limitedMemoryBFGS.isConverged()) {
                    break;
                }
                limitedMemoryBFGS.optimize(1);
                logger.info(String.format("Iteration: %3d / %3d", Integer.valueOf(i + 1), Integer.valueOf(JSONException.PARSE_ERROR)));
            }
        } catch (OptimizationException e) {
        } catch (IllegalArgumentException e2) {
        }
        logger.info("Finished optimization");
    }

    private void runPerceptron(RankerModel rankerModel, List<RankerInstance> list) {
        Logger logger = Logger.getLogger(getClass().getName());
        double[] weights = rankerModel.getWeights();
        double[] dArr = this.options_.getAveraging() ? new double[weights.length] : null;
        for (int i = 0; i < this.options_.getNumIterations(); i++) {
            double d = 0.0d;
            double d2 = 0.0d;
            int i2 = 0;
            Collections.shuffle(list, this.options_.getRandom());
            for (RankerInstance rankerInstance : list) {
                String select = rankerModel.select(rankerInstance);
                if (!select.equals(rankerInstance.getInstance().getLemma())) {
                    rankerModel.update(rankerInstance, select, -1.0d);
                    rankerModel.update(rankerInstance, rankerInstance.getInstance().getLemma(), 1.0d);
                    if (dArr != null) {
                        double size = list.size() - i2;
                        if (!$assertionsDisabled && size <= CMAESOptimizer.DEFAULT_STOPFITNESS) {
                            throw new AssertionError();
                        }
                        rankerModel.setWeights(dArr);
                        rankerModel.update(rankerInstance, select, -size);
                        rankerModel.update(rankerInstance, rankerInstance.getInstance().getLemma(), size);
                        rankerModel.setWeights(weights);
                    }
                    d += rankerInstance.getInstance().getCount();
                }
                d2 += rankerInstance.getInstance().getCount();
                i2++;
            }
            if (dArr != null) {
                double size2 = 1.0d / ((i + 1.0d) * list.size());
                double d3 = (i + 2.0d) / (i + 1.0d);
                for (int i3 = 0; i3 < weights.length; i3++) {
                    weights[i3] = dArr[i3] * size2;
                    dArr[i3] = dArr[i3] * d3;
                }
            }
            logger.info(String.format("Train Accuracy: %g / %g = %g", Double.valueOf(d2 - d), Double.valueOf(d2), Double.valueOf(((d2 - d) * 100.0d) / d2)));
        }
    }

    @Override // lemming.lemma.LemmatizerTrainer, lemming.lemma.LemmaCandidateGeneratorTrainer
    public LemmaOptions getOptions() {
        return this.options_;
    }

    public void setOptions(RankerTrainerOptions rankerTrainerOptions) {
        this.options_ = rankerTrainerOptions;
    }

    @Override // lemming.lemma.LemmatizerTrainer, lemming.lemma.LemmaCandidateGeneratorTrainer
    public /* bridge */ /* synthetic */ Lemmatizer train(List list, List list2) {
        return train((List<LemmaInstance>) list, (List<LemmaInstance>) list2);
    }

    @Override // lemming.lemma.LemmaCandidateGeneratorTrainer
    public /* bridge */ /* synthetic */ LemmaCandidateGenerator train(List list, List list2) {
        return train((List<LemmaInstance>) list, (List<LemmaInstance>) list2);
    }

    static {
        $assertionsDisabled = !RankerTrainer.class.desiredAssertionStatus();
    }
}
