/*
 * Decompiled with CFR 0.152.
 */
package org.elasticsearch.xpack.inference.chunking;

import com.ibm.icu.text.BreakIterator;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import org.elasticsearch.xpack.inference.chunking.WordBoundaryChunker;

public class SentenceBoundaryChunker {
    private final BreakIterator sentenceIterator = BreakIterator.getSentenceInstance((Locale)Locale.ROOT);
    private final BreakIterator wordIterator = BreakIterator.getWordInstance((Locale)Locale.ROOT);

    public List<String> chunk(String input, int maxNumberWordsPerChunk) {
        ArrayList<String> chunks = new ArrayList<String>();
        this.sentenceIterator.setText(input);
        this.wordIterator.setText(input);
        int chunkStart = 0;
        int chunkEnd = 0;
        int sentenceStart = 0;
        int chunkWordCount = 0;
        int boundary = this.sentenceIterator.next();
        while (boundary != -1) {
            int sentenceEnd = this.sentenceIterator.current();
            int countWordsInSentence = this.countWords(sentenceStart, sentenceEnd);
            if (chunkWordCount + countWordsInSentence > maxNumberWordsPerChunk) {
                if (chunkWordCount > 0) {
                    chunks.add(input.substring(chunkStart, chunkEnd));
                    chunkStart = chunkEnd;
                    chunkWordCount = countWordsInSentence;
                }
                if (countWordsInSentence > maxNumberWordsPerChunk) {
                    int i;
                    List<WordBoundaryChunker.ChunkPosition> sentenceSplits = SentenceBoundaryChunker.splitLongSentence(input.substring(chunkStart, sentenceEnd), maxNumberWordsPerChunk, SentenceBoundaryChunker.overlapForChunkSize(maxNumberWordsPerChunk));
                    for (i = 0; i < sentenceSplits.size() - 1; ++i) {
                        chunks.add(input.substring(chunkStart + sentenceSplits.get(i).start(), chunkStart + sentenceSplits.get(i).end()));
                    }
                    chunkStart += sentenceSplits.get(i).start();
                    chunkWordCount = sentenceSplits.get(i).wordCount();
                }
            } else {
                chunkWordCount += countWordsInSentence;
            }
            sentenceStart = sentenceEnd;
            chunkEnd = sentenceEnd;
            boundary = this.sentenceIterator.next();
        }
        if (chunkWordCount > 0) {
            chunks.add(input.substring(chunkStart));
        }
        return chunks;
    }

    static List<WordBoundaryChunker.ChunkPosition> splitLongSentence(String text, int maxNumberOfWords, int overlap) {
        return new WordBoundaryChunker().chunkPositions(text, maxNumberOfWords, overlap);
    }

    private int countWords(int start, int end) {
        return SentenceBoundaryChunker.countWords(start, end, this.wordIterator);
    }

    static int countWords(int start, int end, BreakIterator wordIterator) {
        assert (start < end);
        wordIterator.preceding(start);
        int boundary = wordIterator.current();
        int wordCount = 0;
        while (boundary != -1 && boundary <= end) {
            int wordStatus = wordIterator.getRuleStatus();
            if (wordStatus != 0) {
                ++wordCount;
            }
            boundary = wordIterator.next();
        }
        return wordCount;
    }

    private static int overlapForChunkSize(int chunkSize) {
        return (chunkSize - 1) / 2;
    }
}

