/*
 * Decompiled with CFR 0.152.
 */
package org.elasticsearch.xpack.inference.chunking;

import com.ibm.icu.text.BreakIterator;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import org.elasticsearch.common.Strings;
import org.elasticsearch.inference.ChunkingSettings;
import org.elasticsearch.xpack.inference.chunking.Chunker;
import org.elasticsearch.xpack.inference.chunking.ChunkerUtils;
import org.elasticsearch.xpack.inference.chunking.SentenceBoundaryChunkingSettings;
import org.elasticsearch.xpack.inference.chunking.WordBoundaryChunker;

public class SentenceBoundaryChunker
implements Chunker {
    private final BreakIterator sentenceIterator = BreakIterator.getSentenceInstance((Locale)Locale.ROOT);
    private final BreakIterator wordIterator = BreakIterator.getWordInstance((Locale)Locale.ROOT);

    @Override
    public List<Chunker.ChunkOffset> chunk(String input, ChunkingSettings chunkingSettings) {
        if (chunkingSettings instanceof SentenceBoundaryChunkingSettings) {
            SentenceBoundaryChunkingSettings sentenceBoundaryChunkingSettings = (SentenceBoundaryChunkingSettings)chunkingSettings;
            return this.chunk(input, sentenceBoundaryChunkingSettings.maxChunkSize, sentenceBoundaryChunkingSettings.sentenceOverlap > 0);
        }
        throw new IllegalArgumentException(Strings.format((String)"SentenceBoundaryChunker can't use ChunkingSettings with strategy [%s]", (Object[])new Object[]{chunkingSettings.getChunkingStrategy()}));
    }

    public List<Chunker.ChunkOffset> chunk(String input, int maxNumberWordsPerChunk, boolean includePrecedingSentence) {
        ArrayList<Chunker.ChunkOffset> chunks = new ArrayList<Chunker.ChunkOffset>();
        this.sentenceIterator.setText(input);
        this.wordIterator.setText(input);
        int chunkStart = 0;
        int chunkEnd = 0;
        int sentenceStart = 0;
        int chunkWordCount = 0;
        int wordsInPrecedingSentenceCount = 0;
        int previousSentenceStart = 0;
        int boundary = this.sentenceIterator.next();
        while (boundary != -1) {
            int sentenceEnd = this.sentenceIterator.current();
            int wordsInSentenceCount = this.countWords(sentenceStart, sentenceEnd);
            if (chunkWordCount + wordsInSentenceCount > maxNumberWordsPerChunk) {
                int nextChunkWordCount = wordsInSentenceCount;
                if (chunkWordCount > 0) {
                    chunks.add(new Chunker.ChunkOffset(chunkStart, chunkEnd));
                    if (includePrecedingSentence) {
                        if (wordsInPrecedingSentenceCount + wordsInSentenceCount > maxNumberWordsPerChunk) {
                            int numWordsToSkip = SentenceBoundaryChunker.numWordsToSkipInPreviousSentence(wordsInPrecedingSentenceCount, maxNumberWordsPerChunk);
                            chunkStart = this.skipWords(input, previousSentenceStart, numWordsToSkip);
                            chunkWordCount = wordsInPrecedingSentenceCount - numWordsToSkip + wordsInSentenceCount;
                        } else {
                            chunkWordCount = wordsInPrecedingSentenceCount + wordsInSentenceCount;
                            chunkStart = previousSentenceStart;
                        }
                        nextChunkWordCount = chunkWordCount;
                    } else {
                        chunkStart = chunkEnd;
                        chunkWordCount = wordsInSentenceCount;
                    }
                }
                if (nextChunkWordCount > maxNumberWordsPerChunk) {
                    int i;
                    List<WordBoundaryChunker.ChunkPosition> sentenceSplits = SentenceBoundaryChunker.splitLongSentence(input.substring(chunkStart, sentenceEnd), maxNumberWordsPerChunk, SentenceBoundaryChunker.overlapForChunkSize(maxNumberWordsPerChunk));
                    for (i = 0; i < sentenceSplits.size() - 1; ++i) {
                        chunks.add(new Chunker.ChunkOffset(chunkStart + sentenceSplits.get(i).offsets().start(), chunkStart + sentenceSplits.get(i).offsets().end()));
                    }
                    chunkStart += sentenceSplits.get(i).offsets().start();
                    chunkWordCount = sentenceSplits.get(i).wordCount();
                }
            } else {
                chunkWordCount += wordsInSentenceCount;
            }
            if (includePrecedingSentence) {
                previousSentenceStart = sentenceStart;
                wordsInPrecedingSentenceCount = wordsInSentenceCount;
            }
            sentenceStart = sentenceEnd;
            chunkEnd = sentenceEnd;
            boundary = this.sentenceIterator.next();
        }
        if (chunkWordCount > 0) {
            chunks.add(new Chunker.ChunkOffset(chunkStart, input.length()));
        }
        if (chunks.isEmpty()) {
            chunks.add(new Chunker.ChunkOffset(0, input.length()));
        }
        return chunks;
    }

    static List<WordBoundaryChunker.ChunkPosition> splitLongSentence(String text, int maxNumberOfWords, int overlap) {
        return new WordBoundaryChunker().chunkPositions(text, maxNumberOfWords, overlap);
    }

    static int numWordsToSkipInPreviousSentence(int wordsInPrecedingSentenceCount, int maxNumberWordsPerChunk) {
        int maxWordsInOverlap = SentenceBoundaryChunker.maxWordsInOverlap(maxNumberWordsPerChunk);
        if (wordsInPrecedingSentenceCount > maxWordsInOverlap) {
            return wordsInPrecedingSentenceCount - maxWordsInOverlap;
        }
        return 0;
    }

    static int maxWordsInOverlap(int maxNumberWordsPerChunk) {
        return Math.min(maxNumberWordsPerChunk / 2, 20);
    }

    private int skipWords(String input, int start, int numWords) {
        BreakIterator itr = BreakIterator.getWordInstance((Locale)Locale.ROOT);
        itr.setText(input);
        return SentenceBoundaryChunker.skipWords(start, numWords, itr);
    }

    static int skipWords(int start, int numWords, BreakIterator wordIterator) {
        wordIterator.preceding(start);
        int boundary = wordIterator.current();
        int wordCount = 0;
        while (boundary != -1 && wordCount < numWords) {
            int wordStatus = wordIterator.getRuleStatus();
            if (wordStatus != 0) {
                ++wordCount;
            }
            boundary = wordIterator.next();
        }
        if (boundary == -1) {
            return wordIterator.last();
        }
        return boundary;
    }

    private int countWords(int start, int end) {
        return ChunkerUtils.countWords(start, end, this.wordIterator);
    }

    private static int overlapForChunkSize(int chunkSize) {
        return Math.min(20, (chunkSize - 1) / 2);
    }
}

