/*
 * Decompiled with CFR 0.152.
 */
package org.elasticsearch.xpack.inference.chunking;

import com.ibm.icu.text.BreakIterator;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;

public class WordBoundaryChunker {
    private BreakIterator wordIterator = BreakIterator.getWordInstance((Locale)Locale.ROOT);

    public List<String> chunk(String input, int chunkSize, int overlap) {
        if (input.isEmpty()) {
            return List.of("");
        }
        List<ChunkPosition> chunkPositions = this.chunkPositions(input, chunkSize, overlap);
        ArrayList<String> chunks = new ArrayList<String>(chunkPositions.size());
        for (ChunkPosition pos : chunkPositions) {
            chunks.add(input.substring(pos.start, pos.end));
        }
        return chunks;
    }

    List<ChunkPosition> chunkPositions(String input, int chunkSize, int overlap) {
        if (overlap > 0 && overlap > chunkSize / 2) {
            throw new IllegalArgumentException("Invalid chunking parameters, overlap [" + overlap + "] must be < chunk size / 2 [" + chunkSize + " / 2 = " + chunkSize / 2 + "]");
        }
        if (overlap < 0) {
            throw new IllegalArgumentException("Invalid chunking parameters, overlap [" + overlap + "] must be >= 0");
        }
        if (input.isEmpty()) {
            return List.of();
        }
        ArrayList<ChunkPosition> chunkPositions = new ArrayList<ChunkPosition>();
        int chunkSizeLessOverlap = chunkSize - overlap;
        int wordsInChunkCountIncludingOverlap = 0;
        int nextWindowStart = 0;
        int windowStart = 0;
        int wordsSinceStartWindowWasMarked = 0;
        this.wordIterator.setText(input);
        int boundary = this.wordIterator.next();
        while (boundary != -1) {
            if (this.wordIterator.getRuleStatus() != 0) {
                ++wordsSinceStartWindowWasMarked;
                if (++wordsInChunkCountIncludingOverlap >= chunkSize) {
                    chunkPositions.add(new ChunkPosition(windowStart, boundary, wordsInChunkCountIncludingOverlap));
                    wordsInChunkCountIncludingOverlap = overlap;
                    if (overlap == 0) {
                        nextWindowStart = boundary;
                    }
                    windowStart = nextWindowStart;
                }
                if (wordsSinceStartWindowWasMarked == chunkSizeLessOverlap) {
                    nextWindowStart = boundary;
                    wordsSinceStartWindowWasMarked = 0;
                }
            }
            boundary = this.wordIterator.next();
        }
        if (wordsInChunkCountIncludingOverlap > overlap || chunkPositions.isEmpty()) {
            chunkPositions.add(new ChunkPosition(windowStart, input.length(), wordsInChunkCountIncludingOverlap));
        }
        return chunkPositions;
    }

    record ChunkPosition(int start, int end, int wordCount) {
    }
}

