/*
 * Decompiled with CFR 0.152.
 */
package org.elasticsearch.xpack.ml.inference.nlp.tokenizers;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.SortedMap;
import org.elasticsearch.xpack.core.ml.inference.trainedmodel.Tokenization;
import org.elasticsearch.xpack.ml.inference.nlp.tokenizers.BertTokenizer;
import org.elasticsearch.xpack.ml.inference.nlp.tokenizers.JapaneseWordPieceAnalyzer;
import org.elasticsearch.xpack.ml.inference.nlp.tokenizers.WordPieceAnalyzer;

public class BertJapaneseTokenizer
extends BertTokenizer {
    protected BertJapaneseTokenizer(List<String> originalVocab, SortedMap<String, Integer> vocab, boolean doLowerCase, boolean doTokenizeCjKChars, boolean doStripAccents, boolean withSpecialTokens, int maxSequenceLength, Set<String> neverSplit) {
        super(originalVocab, vocab, doLowerCase, doTokenizeCjKChars, doStripAccents, withSpecialTokens, maxSequenceLength, neverSplit);
    }

    @Override
    protected WordPieceAnalyzer createWordPieceAnalyzer(List<String> vocabulary, List<String> neverSplit, boolean doLowerCase, boolean doTokenizeCjKChars, boolean doStripAccents, String unknownToken) {
        return new JapaneseWordPieceAnalyzer(vocabulary, new ArrayList<String>(neverSplit), doLowerCase, doStripAccents, unknownToken);
    }

    public static BertTokenizer.Builder builder(List<String> vocab, Tokenization tokenization) {
        return new JapaneseBuilder(vocab, tokenization);
    }

    public static class JapaneseBuilder
    extends BertTokenizer.Builder {
        protected JapaneseBuilder(List<String> vocab, Tokenization tokenization) {
            super(vocab, tokenization);
        }

        @Override
        public BertTokenizer build() {
            if (this.doStripAccents == null) {
                this.doStripAccents = this.doLowerCase;
            }
            if (this.neverSplit == null) {
                this.neverSplit = Collections.emptySet();
            }
            return new BertJapaneseTokenizer(this.originalVocab, this.vocab, this.doLowerCase, this.doTokenizeCjKChars, this.doStripAccents, this.withSpecialTokens, this.maxSequenceLength, this.neverSplit);
        }
    }
}

