/*
 * Decompiled with CFR 0.152.
 */
package org.elasticsearch.xpack.ml.inference.nlp.tokenizers;

import java.util.LinkedList;
import org.apache.lucene.analysis.CharArraySet;
import org.elasticsearch.xpack.ml.inference.nlp.tokenizers.CharTrie;
import org.elasticsearch.xpack.ml.inference.nlp.tokenizers.DelimitedToken;

final class TokenizerUtils {
    private TokenizerUtils() {
    }

    static LinkedList<DelimitedToken> splitOutNeverSplit(CharSequence input, CharTrie neverSplit, CharArraySet neverSplitSet) {
        int finalIndex;
        CharTrie current = neverSplit;
        LinkedList<DelimitedToken> bigTokens = new LinkedList<DelimitedToken>();
        int windowStart = 0;
        int neverSplitStart = 0;
        for (int i = 0; i < input.length(); ++i) {
            CharTrie childNode = current.children().get(Character.valueOf(input.charAt(i)));
            if (current == neverSplit && childNode != null) {
                neverSplitStart = i;
            }
            if (childNode == null) {
                if (current != neverSplit) {
                    current = neverSplit;
                }
                if ((childNode = current.children().get(Character.valueOf(input.charAt(i)))) == null) continue;
                neverSplitStart = i;
                current = childNode;
                continue;
            }
            if (childNode.isLeaf()) {
                CharSequenceRef maybeNeverSplit = new CharSequenceRef(input, neverSplitStart, i + 1 - neverSplitStart);
                if (neverSplitSet.contains((CharSequence)maybeNeverSplit)) {
                    if (windowStart < neverSplitStart) {
                        bigTokens.add(new DelimitedToken(new CharSequenceRef(input, windowStart, neverSplitStart - windowStart), windowStart, neverSplitStart));
                    }
                    bigTokens.add(new DelimitedToken(maybeNeverSplit, neverSplitStart, i + 1));
                }
                windowStart = i + 1;
                current = neverSplit;
                continue;
            }
            current = childNode;
        }
        int n = finalIndex = bigTokens.isEmpty() ? 0 : ((DelimitedToken)bigTokens.getLast()).endOffset();
        if (finalIndex < input.length()) {
            bigTokens.add(new DelimitedToken(new CharSequenceRef(input, finalIndex, input.length() - finalIndex), finalIndex, input.length()));
        }
        return bigTokens;
    }

    static int numUtf8Bytes(int c) {
        if (c < 128) {
            return 1;
        }
        if (c < 2048) {
            return 2;
        }
        if (c < 65536) {
            return 3;
        }
        return 4;
    }

    public record CharSequenceRef(CharSequence wrapped, int offset, int len) implements CharSequence
    {
        public int getOffset() {
            return this.offset;
        }

        @Override
        public int length() {
            return this.len;
        }

        @Override
        public char charAt(int index) {
            return this.wrapped.charAt(index + this.offset);
        }

        @Override
        public CharSequence subSequence(int start, int end) {
            return this.wrapped.subSequence(start + this.offset, end + this.offset);
        }

        @Override
        public String toString() {
            return this.wrapped.subSequence(this.offset, this.offset + this.len).toString();
        }
    }
}

