/*
 * Decompiled with CFR 0.152.
 */
package org.elasticsearch.xpack.core.ml.inference.preprocessing.customwordembedding;

import java.util.Map;
import java.util.TreeMap;
import org.apache.lucene.util.Counter;
import org.elasticsearch.xpack.core.ml.inference.preprocessing.customwordembedding.ContinuousFeatureValue;
import org.elasticsearch.xpack.core.ml.inference.preprocessing.customwordembedding.FeatureExtractor;
import org.elasticsearch.xpack.core.ml.inference.preprocessing.customwordembedding.FeatureValue;
import org.elasticsearch.xpack.core.ml.inference.preprocessing.customwordembedding.Hash32;

public class NGramFeatureExtractor
implements FeatureExtractor {
    private static final Hash32 hashing = new Hash32();
    private final int nGrams;
    private final int dimensionId;

    public NGramFeatureExtractor(int nGrams, int dimensionId) {
        this.nGrams = nGrams;
        this.dimensionId = dimensionId;
    }

    @Override
    public FeatureValue[] extractFeatures(String text) {
        StringBuilder newText = new StringBuilder("^");
        for (int i = 0; i < text.length(); ++i) {
            char c = text.charAt(i);
            if (c == ' ') {
                newText.append("$ ^");
                continue;
            }
            newText.append(c);
        }
        newText.append("$");
        TreeMap<String, Counter> charNGrams = new TreeMap<String, Counter>();
        int countSum = 0;
        String textWithTerminators = newText.toString();
        int end = textWithTerminators.length() - this.nGrams;
        for (int start = 0; start <= end; ++start) {
            char currentChar;
            int index;
            StringBuilder charNGram = new StringBuilder();
            for (index = 0; index < this.nGrams && (currentChar = textWithTerminators.charAt(start + index)) != ' '; ++index) {
                charNGram.append(currentChar);
            }
            if (index != this.nGrams) continue;
            charNGrams.computeIfAbsent(charNGram.toString(), ngram -> Counter.newCounter()).addAndGet(1L);
            ++countSum;
        }
        FeatureValue[] results = new FeatureValue[charNGrams.size()];
        int index = 0;
        for (Map.Entry entry : charNGrams.entrySet()) {
            String key = (String)entry.getKey();
            long value = ((Counter)entry.getValue()).get();
            double weight = (double)value / (double)countSum;
            int id = (int)(hashing.hash(key) % (long)this.dimensionId);
            results[index++] = new ContinuousFeatureValue(id, weight);
        }
        return results;
    }
}

