/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.dev;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import org.languagetool.JLanguageTool;
import org.languagetool.Language;
import org.languagetool.language.AmericanEnglish;
import org.languagetool.languagemodel.LuceneSingleIndexLanguageModel;
import org.languagetool.rules.ConfusionSetLoader;

class HomophoneOccurrenceDumper
extends LuceneSingleIndexLanguageModel {
    private static final int MIN_COUNT = 1000;

    HomophoneOccurrenceDumper(File topIndexDir) throws IOException {
        super(topIndexDir);
    }

    Map<String, Long> getContext(String ... tokens) throws IOException {
        BytesRef byteRef;
        Objects.requireNonNull(tokens);
        TermsEnum iterator = this.getIterator();
        HashMap<String, Long> result = new HashMap<String, Long>();
        boolean i = false;
        while ((byteRef = iterator.next()) != null) {
            String term = new String(byteRef.bytes, byteRef.offset, byteRef.length);
            for (String token : tokens) {
                String[] split;
                if (!term.contains(" " + token + " ") || (split = term.split(" ")).length != 3) continue;
                long count = this.getCount(Arrays.asList(split[0], split[1], split[2]));
                result.put(term, count);
            }
        }
        return result;
    }

    private void run(String confusionSetPath) throws IOException {
        System.err.println("Loading confusion sets from " + confusionSetPath + ", minimum occurrence: 1000");
        ConfusionSetLoader confusionSetLoader = new ConfusionSetLoader((Language)AmericanEnglish.getInstance());
        InputStream inputStream = JLanguageTool.getDataBroker().getFromResourceDirAsStream(confusionSetPath);
        Map map = confusionSetLoader.loadConfusionPairs(inputStream);
        Set<String> confusionTerms = map.keySet();
        this.dumpOccurrences(confusionTerms);
    }

    private void dumpOccurrences(Set<String> tokens) throws IOException {
        BytesRef byteRef;
        Objects.requireNonNull(tokens);
        TermsEnum iterator = this.getIterator();
        int i = 0;
        while ((byteRef = iterator.next()) != null) {
            long count;
            String token;
            String term = new String(byteRef.bytes, byteRef.offset, byteRef.length);
            String[] split = term.split(" ");
            if (split.length == 3 && tokens.contains(token = split[1]) && (count = this.getCount(Arrays.asList(split[0], split[1], split[2]))) >= 1000L) {
                System.out.println(token + "\t" + count + "\t" + split[0] + " " + split[1] + " " + split[2]);
            }
            if (i % 10000 == 0) {
                System.err.println(i + "...");
            }
            ++i;
        }
    }

    private TermsEnum getIterator() throws IOException {
        LuceneSingleIndexLanguageModel.LuceneSearcher luceneSearcher = this.getLuceneSearcher(3);
        Fields fields = MultiFields.getFields((IndexReader)luceneSearcher.getReader());
        Terms terms = fields.terms("ngram");
        return terms.iterator();
    }

    public static void main(String[] args) throws IOException {
        if (args.length != 1) {
            System.out.println("Usage: " + HomophoneOccurrenceDumper.class.getSimpleName() + " <indexDir>");
            System.exit(1);
        }
        try (HomophoneOccurrenceDumper dumper = new HomophoneOccurrenceDumper(new File(args[0]));){
            dumper.run("/en/confusion_sets.txt");
        }
    }

    public long getTotalTokenCount() {
        throw new RuntimeException("not implemented");
    }
}

