package edu.stanford.nlp.scoref;

import edu.stanford.nlp.hcoref.data.CorefCluster;
import edu.stanford.nlp.hcoref.data.Document;
import edu.stanford.nlp.hcoref.data.Mention;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.util.Pair;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.stream.Collectors;

/* loaded from: input_file:edu/stanford/nlp/scoref/DatasetBuilder.class */
public class DatasetBuilder implements DocumentProcessor {
    private final int maxExamplesPerDocument;
    private final double minClassImbalancedPerDocument;
    private final Map<Integer, Map<Pair<Integer, Integer>, Boolean>> mentionPairs;
    private final Random random;

    public DatasetBuilder() {
        this(0.0d, Integer.MAX_VALUE);
    }

    public DatasetBuilder(double d, int i) {
        this.maxExamplesPerDocument = i;
        this.minClassImbalancedPerDocument = d;
        this.mentionPairs = new HashMap();
        this.random = new Random(0L);
    }

    @Override // edu.stanford.nlp.scoref.DocumentProcessor
    public void process(int i, Document document) {
        Map<Pair<Integer, Integer>, Boolean> unlabeledMentionPairs = StatisticalCorefUtils.getUnlabeledMentionPairs(document);
        Iterator<CorefCluster> it = document.goldCorefClusters.values().iterator();
        while (it.hasNext()) {
            ArrayList arrayList = new ArrayList(it.next().getCorefMentions());
            for (int i2 = 0; i2 < arrayList.size(); i2++) {
                Iterator it2 = arrayList.iterator();
                while (it2.hasNext()) {
                    Pair<Integer, Integer> pair = new Pair<>(Integer.valueOf(((Mention) arrayList.get(i2)).mentionID), Integer.valueOf(((Mention) it2.next()).mentionID));
                    if (unlabeledMentionPairs.containsKey(pair)) {
                        unlabeledMentionPairs.put(pair, true);
                    }
                }
            }
        }
        long count = unlabeledMentionPairs.keySet().stream().filter(pair2 -> {
            return ((Boolean) unlabeledMentionPairs.get(pair2)).booleanValue();
        }).count();
        List list = (List) unlabeledMentionPairs.keySet().stream().filter(pair3 -> {
            return !((Boolean) unlabeledMentionPairs.get(pair3)).booleanValue();
        }).collect(Collectors.toList());
        if (((float) count) / ((float) (count + list.size())) < this.minClassImbalancedPerDocument) {
            int i3 = (int) ((count / this.minClassImbalancedPerDocument) - count);
            Collections.shuffle(list);
            for (int i4 = i3; i4 < list.size(); i4++) {
                unlabeledMentionPairs.remove(list.get(i4));
            }
        }
        HashMap hashMap = new HashMap();
        for (Pair<Integer, Integer> pair4 : unlabeledMentionPairs.keySet()) {
            List list2 = (List) hashMap.get(pair4.second);
            if (list2 == null) {
                list2 = new ArrayList();
                hashMap.put(pair4.second, list2);
            }
            list2.add(pair4.first);
        }
        ArrayList arrayList2 = new ArrayList(hashMap.keySet());
        while (unlabeledMentionPairs.size() > this.maxExamplesPerDocument) {
            int intValue = ((Integer) arrayList2.remove(this.random.nextInt(arrayList2.size()))).intValue();
            Iterator it3 = ((List) hashMap.get(Integer.valueOf(intValue))).iterator();
            while (it3.hasNext()) {
                unlabeledMentionPairs.remove(new Pair(Integer.valueOf(((Integer) it3.next()).intValue()), Integer.valueOf(intValue)));
            }
        }
        this.mentionPairs.put(Integer.valueOf(i), unlabeledMentionPairs);
    }

    @Override // edu.stanford.nlp.scoref.DocumentProcessor
    public void finish() throws Exception {
        IOUtils.writeObjectToFile(this.mentionPairs, StatisticalCorefTrainer.datasetFile);
    }
}
