/*
 * Decompiled with CFR 0.152.
 */
package org.elasticsearch.xpack.ml.aggs.categorization;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.Map;
import java.util.Optional;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.BytesRefHash;
import org.elasticsearch.common.util.ObjectArray;
import org.elasticsearch.core.Releasable;
import org.elasticsearch.core.Releasables;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.CardinalityUpperBound;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.LeafBucketCollector;
import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
import org.elasticsearch.search.aggregations.bucket.DeferableBucketAggregator;
import org.elasticsearch.search.aggregations.bucket.terms.LongKeyedBucketOrds;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator;
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.lookup.SourceLookup;
import org.elasticsearch.xpack.core.ml.job.config.CategorizationAnalyzerConfig;
import org.elasticsearch.xpack.ml.aggs.categorization.CategorizationBytesRefHash;
import org.elasticsearch.xpack.ml.aggs.categorization.CategorizationTokenTree;
import org.elasticsearch.xpack.ml.aggs.categorization.InternalCategorizationAggregation;
import org.elasticsearch.xpack.ml.aggs.categorization.TextCategorization;
import org.elasticsearch.xpack.ml.job.categorization.CategorizationAnalyzer;

public class CategorizeTextAggregator
extends DeferableBucketAggregator {
    private final TermsAggregator.BucketCountThresholds bucketCountThresholds;
    private final SourceLookup sourceLookup;
    private final MappedFieldType fieldType;
    private final CategorizationAnalyzer analyzer;
    private final String sourceFieldName;
    private ObjectArray<CategorizationTokenTree> categorizers;
    private final int maxUniqueTokens;
    private final int maxMatchTokens;
    private final int similarityThreshold;
    private final LongKeyedBucketOrds bucketOrds;
    private final CategorizationBytesRefHash bytesRefHash;

    protected CategorizeTextAggregator(String name, AggregatorFactories factories, AggregationContext context, Aggregator parent, String sourceFieldName, MappedFieldType fieldType, TermsAggregator.BucketCountThresholds bucketCountThresholds, int maxUniqueTokens, int maxMatchTokens, int similarityThreshold, CategorizationAnalyzerConfig categorizationAnalyzerConfig, Map<String, Object> metadata) throws IOException {
        super(name, factories, context, parent, metadata);
        this.sourceLookup = context.lookup().source();
        this.sourceFieldName = sourceFieldName;
        this.fieldType = fieldType;
        CategorizationAnalyzerConfig analyzerConfig = Optional.ofNullable(categorizationAnalyzerConfig).orElse(CategorizationAnalyzerConfig.buildStandardCategorizationAnalyzer(Collections.emptyList()));
        String analyzerName = analyzerConfig.getAnalyzer();
        if (analyzerName != null) {
            Analyzer globalAnalyzer = context.getNamedAnalyzer(analyzerName);
            if (globalAnalyzer == null) {
                throw new IllegalArgumentException("Failed to find global analyzer [" + analyzerName + "]");
            }
            this.analyzer = new CategorizationAnalyzer(globalAnalyzer, false);
        } else {
            this.analyzer = new CategorizationAnalyzer(context.buildCustomAnalyzer(context.getIndexSettings(), false, analyzerConfig.getTokenizer(), analyzerConfig.getCharFilters(), analyzerConfig.getTokenFilters()), true);
        }
        this.categorizers = this.bigArrays().newObjectArray(1L);
        this.maxUniqueTokens = maxUniqueTokens;
        this.maxMatchTokens = maxMatchTokens;
        this.similarityThreshold = similarityThreshold;
        this.bucketOrds = LongKeyedBucketOrds.build((BigArrays)this.bigArrays(), (CardinalityUpperBound)CardinalityUpperBound.MANY);
        this.bucketCountThresholds = bucketCountThresholds;
        this.bytesRefHash = new CategorizationBytesRefHash(new BytesRefHash(2048L, this.bigArrays()));
    }

    protected void doClose() {
        super.doClose();
        Releasables.close((Releasable[])new Releasable[]{this.analyzer, this.bytesRefHash, this.bucketOrds, this.categorizers});
    }

    public InternalAggregation[] buildAggregations(long[] ordsToCollect) throws IOException {
        Object[][] topBucketsPerOrd = new InternalCategorizationAggregation.Bucket[ordsToCollect.length][];
        for (int ordIdx = 0; ordIdx < ordsToCollect.length; ++ordIdx) {
            CategorizationTokenTree categorizationTokenTree = (CategorizationTokenTree)this.categorizers.get(ordsToCollect[ordIdx]);
            if (categorizationTokenTree == null) {
                topBucketsPerOrd[ordIdx] = new InternalCategorizationAggregation.Bucket[0];
                continue;
            }
            int size = (int)Math.min(this.bucketOrds.bucketsInOrd((long)ordIdx), (long)this.bucketCountThresholds.getShardSize());
            InternalCategorizationAggregation.BucketCountPriorityQueue ordered = new InternalCategorizationAggregation.BucketCountPriorityQueue(size);
            for (InternalCategorizationAggregation.Bucket bucket : categorizationTokenTree.toIntermediateBuckets(this.bytesRefHash)) {
                if (bucket.docCount < this.bucketCountThresholds.getShardMinDocCount()) continue;
                ordered.insertWithOverflow(bucket);
            }
            topBucketsPerOrd[ordIdx] = new InternalCategorizationAggregation.Bucket[ordered.size()];
            for (int i = ordered.size() - 1; i >= 0; --i) {
                topBucketsPerOrd[ordIdx][i] = (InternalCategorizationAggregation.Bucket)ordered.pop();
            }
        }
        this.buildSubAggsForAllBuckets(topBucketsPerOrd, b -> b.bucketOrd, (b, a) -> {
            b.aggregations = a;
        });
        InternalAggregation[] results = new InternalAggregation[ordsToCollect.length];
        for (int ordIdx = 0; ordIdx < ordsToCollect.length; ++ordIdx) {
            Object[] bucketArray = topBucketsPerOrd[ordIdx];
            Arrays.sort(bucketArray, Comparator.naturalOrder());
            results[ordIdx] = new InternalCategorizationAggregation(this.name, this.bucketCountThresholds.getRequiredSize(), this.bucketCountThresholds.getMinDocCount(), this.maxUniqueTokens, this.maxMatchTokens, this.similarityThreshold, this.metadata(), Arrays.asList(bucketArray));
        }
        return results;
    }

    public InternalAggregation buildEmptyAggregation() {
        return new InternalCategorizationAggregation(this.name, this.bucketCountThresholds.getRequiredSize(), this.bucketCountThresholds.getMinDocCount(), this.maxUniqueTokens, this.maxMatchTokens, this.similarityThreshold, this.metadata());
    }

    protected LeafBucketCollector getLeafCollector(final LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException {
        return new LeafBucketCollectorBase(sub, null){

            public void collect(int doc, long owningBucketOrd) throws IOException {
                CategorizeTextAggregator.this.categorizers = CategorizeTextAggregator.this.bigArrays().grow(CategorizeTextAggregator.this.categorizers, owningBucketOrd + 1L);
                CategorizationTokenTree categorizer = (CategorizationTokenTree)CategorizeTextAggregator.this.categorizers.get(owningBucketOrd);
                if (categorizer == null) {
                    categorizer = new CategorizationTokenTree(CategorizeTextAggregator.this.maxUniqueTokens, CategorizeTextAggregator.this.maxMatchTokens, CategorizeTextAggregator.this.similarityThreshold);
                    CategorizeTextAggregator.this.addRequestCircuitBreakerBytes(categorizer.ramBytesUsed());
                    CategorizeTextAggregator.this.categorizers.set(owningBucketOrd, (Object)categorizer);
                }
                this.collectFromSource(doc, owningBucketOrd, categorizer);
            }

            private void collectFromSource(int doc, long owningBucketOrd, CategorizationTokenTree categorizer) throws IOException {
                CategorizeTextAggregator.this.sourceLookup.setSegmentAndDocument(ctx, doc);
                Iterator itr = CategorizeTextAggregator.this.sourceLookup.extractRawValuesWithoutCaching(CategorizeTextAggregator.this.sourceFieldName).stream().map(obj -> {
                    if (obj == null) {
                        return null;
                    }
                    if (obj instanceof BytesRef) {
                        return CategorizeTextAggregator.this.fieldType.valueForDisplay(obj).toString();
                    }
                    return obj.toString();
                }).iterator();
                while (itr.hasNext()) {
                    TokenStream ts = CategorizeTextAggregator.this.analyzer.tokenStream(CategorizeTextAggregator.this.fieldType.name(), (String)itr.next());
                    this.processTokenStream(owningBucketOrd, ts, doc, categorizer);
                }
            }

            private void processTokenStream(long owningBucketOrd, TokenStream ts, int doc, CategorizationTokenTree categorizer) throws IOException {
                long bucketOrd;
                ArrayList<Integer> tokens = new ArrayList<Integer>();
                try (TokenStream tokenStream = ts;){
                    CharTermAttribute termAtt = (CharTermAttribute)ts.addAttribute(CharTermAttribute.class);
                    ts.reset();
                    int numTokens = 0;
                    while (ts.incrementToken() && numTokens < 100) {
                        if (termAtt.length() <= 0) continue;
                        tokens.add(CategorizeTextAggregator.this.bytesRefHash.put(new BytesRef((CharSequence)termAtt)));
                        ++numTokens;
                    }
                    if (tokens.isEmpty()) {
                        return;
                    }
                }
                long previousSize = categorizer.ramBytesUsed();
                TextCategorization lg = categorizer.parseTokens(tokens.stream().mapToInt(Integer::valueOf).toArray(), CategorizeTextAggregator.this.docCountProvider.getDocCount(doc));
                long newSize = categorizer.ramBytesUsed();
                if (newSize - previousSize > 0L) {
                    CategorizeTextAggregator.this.addRequestCircuitBreakerBytes(newSize - previousSize);
                }
                if ((bucketOrd = CategorizeTextAggregator.this.bucketOrds.add(owningBucketOrd, lg.getId())) < 0L) {
                    bucketOrd = -1L - bucketOrd;
                    CategorizeTextAggregator.this.collectExistingBucket(sub, doc, bucketOrd);
                } else {
                    lg.bucketOrd = bucketOrd;
                    CategorizeTextAggregator.this.collectBucket(sub, doc, bucketOrd);
                }
            }
        };
    }
}

