/*
 * Decompiled with CFR 0.152.
 */
package org.apache.lucene.analysis.ko;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.EnumMap;
import java.util.List;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ko.DecompoundToken;
import org.apache.lucene.analysis.ko.DictionaryToken;
import org.apache.lucene.analysis.ko.GraphvizFormatter;
import org.apache.lucene.analysis.ko.POS;
import org.apache.lucene.analysis.ko.Token;
import org.apache.lucene.analysis.ko.dict.CharacterDefinition;
import org.apache.lucene.analysis.ko.dict.ConnectionCosts;
import org.apache.lucene.analysis.ko.dict.Dictionary;
import org.apache.lucene.analysis.ko.dict.TokenInfoDictionary;
import org.apache.lucene.analysis.ko.dict.TokenInfoFST;
import org.apache.lucene.analysis.ko.dict.UnknownDictionary;
import org.apache.lucene.analysis.ko.dict.UserDictionary;
import org.apache.lucene.analysis.ko.tokenattributes.PartOfSpeechAttribute;
import org.apache.lucene.analysis.ko.tokenattributes.ReadingAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.analysis.util.RollingCharBuffer;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.fst.FST;

public final class KoreanTokenizer
extends Tokenizer {
    public static final DecompoundMode DEFAULT_DECOMPOUND = DecompoundMode.DISCARD;
    private static final boolean VERBOSE = false;
    private static final int MAX_UNKNOWN_WORD_LENGTH = 1024;
    private static final int MAX_BACKTRACE_GAP = 1024;
    private final EnumMap<Type, Dictionary> dictionaryMap = new EnumMap(Type.class);
    private final TokenInfoFST fst;
    private final TokenInfoDictionary dictionary;
    private final UnknownDictionary unkDictionary;
    private final ConnectionCosts costs;
    private final UserDictionary userDictionary;
    private final CharacterDefinition characterDefinition;
    private final FST.Arc<Long> arc = new FST.Arc();
    private final FST.BytesReader fstReader;
    private final IntsRef wordIdRef = new IntsRef();
    private final FST.BytesReader userFSTReader;
    private final TokenInfoFST userFST;
    private final DecompoundMode mode;
    private final boolean outputUnknownUnigrams;
    private final RollingCharBuffer buffer = new RollingCharBuffer();
    private final WrappedPositionArray positions = new WrappedPositionArray();
    private boolean end;
    private int lastBackTracePos;
    private int pos;
    private final List<Token> pending = new ArrayList<Token>();
    private final CharTermAttribute termAtt = (CharTermAttribute)this.addAttribute(CharTermAttribute.class);
    private final OffsetAttribute offsetAtt = (OffsetAttribute)this.addAttribute(OffsetAttribute.class);
    private final PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute)this.addAttribute(PositionIncrementAttribute.class);
    private final PositionLengthAttribute posLengthAtt = (PositionLengthAttribute)this.addAttribute(PositionLengthAttribute.class);
    private final PartOfSpeechAttribute posAtt = (PartOfSpeechAttribute)this.addAttribute(PartOfSpeechAttribute.class);
    private final ReadingAttribute readingAtt = (ReadingAttribute)this.addAttribute(ReadingAttribute.class);
    private GraphvizFormatter dotOut;

    public KoreanTokenizer() {
        this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, null, DEFAULT_DECOMPOUND, false);
    }

    public KoreanTokenizer(AttributeFactory factory, UserDictionary userDictionary, DecompoundMode mode, boolean outputUnknownUnigrams) {
        super(factory);
        this.mode = mode;
        this.outputUnknownUnigrams = outputUnknownUnigrams;
        this.dictionary = TokenInfoDictionary.getInstance();
        this.fst = this.dictionary.getFST();
        this.unkDictionary = UnknownDictionary.getInstance();
        this.characterDefinition = this.unkDictionary.getCharacterDefinition();
        this.userDictionary = userDictionary;
        this.costs = ConnectionCosts.getInstance();
        this.fstReader = this.fst.getBytesReader();
        if (userDictionary != null) {
            this.userFST = userDictionary.getFST();
            this.userFSTReader = this.userFST.getBytesReader();
        } else {
            this.userFST = null;
            this.userFSTReader = null;
        }
        this.buffer.reset(this.input);
        this.resetState();
        this.dictionaryMap.put(Type.KNOWN, this.dictionary);
        this.dictionaryMap.put(Type.UNKNOWN, this.unkDictionary);
        this.dictionaryMap.put(Type.USER, userDictionary);
    }

    public void setGraphvizFormatter(GraphvizFormatter dotOut) {
        this.dotOut = dotOut;
    }

    public void close() throws IOException {
        super.close();
        this.buffer.reset(this.input);
    }

    public void reset() throws IOException {
        super.reset();
        this.buffer.reset(this.input);
        this.resetState();
    }

    private void resetState() {
        this.positions.reset();
        this.pos = 0;
        this.end = false;
        this.lastBackTracePos = 0;
        this.pending.clear();
        this.positions.get(0).add(0, 0, -1, -1, -1, -1, Type.KNOWN);
    }

    public void end() throws IOException {
        super.end();
        int finalOffset = this.correctOffset(this.pos);
        this.offsetAtt.setOffset(finalOffset, finalOffset);
    }

    private int computeSpacePenalty(POS.Tag leftPOS, int numSpaces) {
        int spacePenalty = 0;
        if (numSpaces > 0) {
            switch (leftPOS) {
                case E: 
                case J: 
                case VCP: 
                case XSA: 
                case XSN: 
                case XSV: {
                    spacePenalty = 3000;
                    break;
                }
            }
        }
        return spacePenalty;
    }

    private void add(Dictionary dict, Position fromPosData, int wordPos, int endPos, int wordID, Type type) throws IOException {
        POS.Tag leftPOS = dict.getLeftPOS(wordID);
        int wordCost = dict.getWordCost(wordID);
        int leftID = dict.getLeftId(wordID);
        int leastCost = Integer.MAX_VALUE;
        int leastIDX = -1;
        assert (fromPosData.count > 0);
        for (int idx = 0; idx < fromPosData.count; ++idx) {
            int numSpaces = wordPos - fromPosData.pos;
            int cost = fromPosData.costs[idx] + this.costs.get(fromPosData.lastRightID[idx], leftID) + this.computeSpacePenalty(leftPOS, numSpaces);
            if (cost >= leastCost) continue;
            leastCost = cost;
            leastIDX = idx;
        }
        this.positions.get(endPos).add(leastCost += wordCost, dict.getRightId(wordID), fromPosData.pos, wordPos, leastIDX, wordID, type);
    }

    public boolean incrementToken() throws IOException {
        while (this.pending.size() == 0) {
            if (this.end) {
                return false;
            }
            this.parse();
        }
        Token token = this.pending.remove(this.pending.size() - 1);
        int length = token.getLength();
        this.clearAttributes();
        assert (length > 0);
        this.termAtt.copyBuffer(token.getSurfaceForm(), token.getOffset(), length);
        this.offsetAtt.setOffset(this.correctOffset(token.getStartOffset()), this.correctOffset(token.getEndOffset()));
        this.posAtt.setToken(token);
        this.readingAtt.setToken(token);
        this.posIncAtt.setPositionIncrement(token.getPositionIncrement());
        this.posLengthAtt.setPositionLength(token.getPositionLength());
        return true;
    }

    private void parse() throws IOException {
        int leastIDX;
        int unknownWordEndIndex = -1;
        while (this.buffer.get(this.pos) != -1) {
            int ch;
            int output;
            boolean isFrontier;
            Position posData = this.positions.get(this.pos);
            boolean bl = isFrontier = this.positions.getNextPos() == this.pos + 1;
            if (posData.count == 0) {
                ++this.pos;
                continue;
            }
            if (this.pos > this.lastBackTracePos && posData.count == 1 && isFrontier) {
                this.backtrace(posData, 0);
                posData.costs[0] = 0;
                if (this.pending.size() > 0) {
                    return;
                }
            }
            if (this.pos - this.lastBackTracePos >= 1024) {
                Position posData2;
                int pos2;
                leastIDX = -1;
                int leastCost = Integer.MAX_VALUE;
                Position leastPosData = null;
                for (pos2 = this.pos; pos2 < this.positions.getNextPos(); ++pos2) {
                    posData2 = this.positions.get(pos2);
                    for (int idx = 0; idx < posData2.count; ++idx) {
                        int cost = posData2.costs[idx];
                        if (cost >= leastCost) continue;
                        leastCost = cost;
                        leastIDX = idx;
                        leastPosData = posData2;
                    }
                }
                assert (leastIDX != -1);
                for (pos2 = this.pos; pos2 < this.positions.getNextPos(); ++pos2) {
                    posData2 = this.positions.get(pos2);
                    if (posData2 != leastPosData) {
                        posData2.reset();
                        continue;
                    }
                    if (leastIDX != 0) {
                        posData2.costs[0] = posData2.costs[leastIDX];
                        posData2.lastRightID[0] = posData2.lastRightID[leastIDX];
                        posData2.backPos[0] = posData2.backPos[leastIDX];
                        posData2.backWordPos[0] = posData2.backWordPos[leastIDX];
                        posData2.backIndex[0] = posData2.backIndex[leastIDX];
                        posData2.backID[0] = posData2.backID[leastIDX];
                        posData2.backType[0] = posData2.backType[leastIDX];
                    }
                    posData2.count = 1;
                }
                this.backtrace(leastPosData, 0);
                Arrays.fill(leastPosData.costs, 0, leastPosData.count, 0);
                if (this.pos != leastPosData.pos) {
                    assert (this.pos < leastPosData.pos);
                    this.pos = leastPosData.pos;
                }
                if (this.pending.size() > 0) {
                    return;
                }
            }
            if (Character.getType(this.buffer.get(this.pos)) == 12) {
                int nextChar = this.buffer.get(++this.pos);
                while (nextChar != -1 && Character.getType(nextChar) == 12) {
                    ++this.pos;
                    nextChar = this.buffer.get(this.pos);
                }
            }
            if (this.buffer.get(this.pos) == -1) {
                this.pos = posData.pos;
            }
            boolean anyMatches = false;
            if (this.userFST != null) {
                this.userFST.getFirstArc(this.arc);
                output = 0;
                int posAhead = this.pos;
                while ((ch = this.buffer.get(posAhead)) != -1 && this.userFST.findTargetArc(ch, this.arc, this.arc, posAhead == this.pos, this.userFSTReader) != null) {
                    output += ((Long)this.arc.output).intValue();
                    if (this.arc.isFinal()) {
                        this.add(this.userDictionary, posData, this.pos, posAhead + 1, output + ((Long)this.arc.nextFinalOutput).intValue(), Type.USER);
                        anyMatches = true;
                    }
                    ++posAhead;
                }
            }
            if (!anyMatches) {
                this.fst.getFirstArc(this.arc);
                output = 0;
                int posAhead = this.pos;
                while ((ch = this.buffer.get(posAhead)) != -1 && this.fst.findTargetArc(ch, this.arc, this.arc, posAhead == this.pos, this.fstReader) != null) {
                    output += ((Long)this.arc.output).intValue();
                    if (this.arc.isFinal()) {
                        this.dictionary.lookupWordIds(output + ((Long)this.arc.nextFinalOutput).intValue(), this.wordIdRef);
                        for (int ofs = 0; ofs < this.wordIdRef.length; ++ofs) {
                            this.add(this.dictionary, posData, this.pos, posAhead + 1, this.wordIdRef.ints[this.wordIdRef.offset + ofs], Type.KNOWN);
                            anyMatches = true;
                        }
                    }
                    ++posAhead;
                }
            }
            if (unknownWordEndIndex > posData.pos) {
                ++this.pos;
                continue;
            }
            char firstCharacter = (char)this.buffer.get(this.pos);
            if (!anyMatches || this.characterDefinition.isInvoke(firstCharacter)) {
                int unknownWordLength;
                byte characterId = this.characterDefinition.getCharacterClass(firstCharacter);
                if (!this.characterDefinition.isGroup(firstCharacter)) {
                    unknownWordLength = 1;
                } else {
                    int next;
                    unknownWordLength = 1;
                    Character.UnicodeScript scriptCode = Character.UnicodeScript.of(firstCharacter);
                    boolean isPunct = KoreanTokenizer.isPunctuation(firstCharacter);
                    int posAhead = this.pos + 1;
                    while (unknownWordLength < 1024 && (next = this.buffer.get(posAhead)) != -1) {
                        boolean sameScript;
                        char ch2 = (char)next;
                        int chType = Character.getType(ch2);
                        Character.UnicodeScript sc = Character.UnicodeScript.of(next);
                        boolean bl2 = sameScript = KoreanTokenizer.isSameScript(scriptCode, sc) || chType == 6;
                        if (!sameScript || KoreanTokenizer.isPunctuation(ch2, chType) != isPunct || !this.characterDefinition.isGroup(ch2)) break;
                        ++unknownWordLength;
                        if (KoreanTokenizer.isCommonOrInherited(scriptCode) && !KoreanTokenizer.isCommonOrInherited(sc)) {
                            scriptCode = sc;
                            characterId = this.characterDefinition.getCharacterClass(ch2);
                        }
                        ++posAhead;
                    }
                }
                this.unkDictionary.lookupWordIds(characterId, this.wordIdRef);
                for (int ofs = 0; ofs < this.wordIdRef.length; ++ofs) {
                    this.add(this.unkDictionary, posData, this.pos, this.pos + unknownWordLength, this.wordIdRef.ints[this.wordIdRef.offset + ofs], Type.UNKNOWN);
                }
            }
            ++this.pos;
        }
        this.end = true;
        if (this.pos > 0) {
            Position endPosData = this.positions.get(this.pos);
            int leastCost = Integer.MAX_VALUE;
            leastIDX = -1;
            for (int idx = 0; idx < endPosData.count; ++idx) {
                int cost = endPosData.costs[idx] + this.costs.get(endPosData.lastRightID[idx], 0);
                if (cost >= leastCost) continue;
                leastCost = cost;
                leastIDX = idx;
            }
            this.backtrace(endPosData, leastIDX);
        }
    }

    private void backtrace(Position endPosData, int fromIDX) {
        int endPos = endPosData.pos;
        char[] fragment = this.buffer.get(this.lastBackTracePos, endPos - this.lastBackTracePos);
        if (this.dotOut != null) {
            this.dotOut.onBacktrace(this, this.positions, this.lastBackTracePos, endPosData, fromIDX, fragment, this.end);
        }
        int pos = endPos;
        int bestIDX = fromIDX;
        while (pos > this.lastBackTracePos) {
            Position posData = this.positions.get(pos);
            assert (bestIDX < posData.count);
            int backPos = posData.backPos[bestIDX];
            int backWordPos = posData.backWordPos[bestIDX];
            assert (backPos >= this.lastBackTracePos) : "backPos=" + backPos + " vs lastBackTracePos=" + this.lastBackTracePos;
            int length = pos - backWordPos;
            Type backType = posData.backType[bestIDX];
            int backID = posData.backID[bestIDX];
            int nextBestIDX = posData.backIndex[bestIDX];
            int fragmentOffset = backWordPos - this.lastBackTracePos;
            assert (fragmentOffset >= 0);
            Dictionary dict = this.getDict(backType);
            if (this.outputUnknownUnigrams && backType == Type.UNKNOWN) {
                for (int i = length - 1; i >= 0; --i) {
                    DictionaryToken token;
                    int charLen = 1;
                    if (i > 0 && Character.isLowSurrogate(fragment[fragmentOffset + i])) {
                        --i;
                        charLen = 2;
                    }
                    if (this.shouldFilterToken(token = new DictionaryToken(Type.UNKNOWN, this.unkDictionary, CharacterDefinition.NGRAM, fragment, fragmentOffset + i, charLen, backWordPos + i, backWordPos + i + charLen))) continue;
                    this.pending.add(token);
                }
            } else {
                DictionaryToken token = new DictionaryToken(backType, dict, backID, fragment, fragmentOffset, length, backWordPos, backWordPos + length);
                if (token.getPOSType() == POS.Type.MORPHEME || this.mode == DecompoundMode.NONE) {
                    if (!this.shouldFilterToken(token)) {
                        this.pending.add(token);
                    }
                } else {
                    Dictionary.Morpheme[] morphemes = token.getMorphemes();
                    if (morphemes == null) {
                        this.pending.add(token);
                    } else {
                        int endOffset = backWordPos + length;
                        int posLen = 0;
                        for (int i = morphemes.length - 1; i >= 0; --i) {
                            DecompoundToken compoundToken;
                            Dictionary.Morpheme morpheme = morphemes[i];
                            if (token.getPOSType() == POS.Type.COMPOUND) {
                                assert (endOffset - morpheme.surfaceForm.length() >= 0);
                                compoundToken = new DecompoundToken(morpheme.posTag, morpheme.surfaceForm, endOffset - morpheme.surfaceForm.length(), endOffset);
                            } else {
                                compoundToken = new DecompoundToken(morpheme.posTag, morpheme.surfaceForm, token.getStartOffset(), token.getEndOffset());
                            }
                            if (i == 0 && this.mode == DecompoundMode.MIXED) {
                                compoundToken.setPositionIncrement(0);
                            }
                            ++posLen;
                            endOffset -= morpheme.surfaceForm.length();
                            this.pending.add(compoundToken);
                        }
                        if (this.mode == DecompoundMode.MIXED) {
                            token.setPositionLength(Math.max(1, posLen));
                            this.pending.add(token);
                        }
                    }
                }
            }
            pos = backPos;
            bestIDX = nextBestIDX;
        }
        this.lastBackTracePos = endPos;
        this.buffer.freeBefore(endPos);
        this.positions.freeBefore(endPos);
    }

    Dictionary getDict(Type type) {
        return this.dictionaryMap.get((Object)type);
    }

    private boolean shouldFilterToken(Token token) {
        return KoreanTokenizer.isPunctuation(token.getSurfaceForm()[token.getOffset()]);
    }

    private static boolean isPunctuation(char ch) {
        return KoreanTokenizer.isPunctuation(ch, Character.getType(ch));
    }

    private static boolean isPunctuation(char ch, int cid) {
        if (ch == '\u318d') {
            return true;
        }
        switch (cid) {
            case 12: 
            case 13: 
            case 14: 
            case 15: 
            case 16: 
            case 20: 
            case 21: 
            case 22: 
            case 23: 
            case 24: 
            case 25: 
            case 26: 
            case 27: 
            case 28: 
            case 29: 
            case 30: {
                return true;
            }
        }
        return false;
    }

    private static boolean isCommonOrInherited(Character.UnicodeScript script) {
        return script == Character.UnicodeScript.INHERITED || script == Character.UnicodeScript.COMMON;
    }

    private static boolean isSameScript(Character.UnicodeScript scriptOne, Character.UnicodeScript scriptTwo) {
        return scriptOne == scriptTwo || KoreanTokenizer.isCommonOrInherited(scriptOne) || KoreanTokenizer.isCommonOrInherited(scriptTwo);
    }

    static final class WrappedPositionArray {
        private Position[] positions = new Position[8];
        private int nextWrite;
        private int nextPos;
        private int count;

        public WrappedPositionArray() {
            for (int i = 0; i < this.positions.length; ++i) {
                this.positions[i] = new Position();
            }
        }

        public void reset() {
            --this.nextWrite;
            while (this.count > 0) {
                if (this.nextWrite == -1) {
                    this.nextWrite = this.positions.length - 1;
                }
                this.positions[this.nextWrite--].reset();
                --this.count;
            }
            this.nextWrite = 0;
            this.nextPos = 0;
            this.count = 0;
        }

        public Position get(int pos) {
            while (pos >= this.nextPos) {
                if (this.count == this.positions.length) {
                    Position[] newPositions = new Position[ArrayUtil.oversize((int)(1 + this.count), (int)RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
                    System.arraycopy(this.positions, this.nextWrite, newPositions, 0, this.positions.length - this.nextWrite);
                    System.arraycopy(this.positions, 0, newPositions, this.positions.length - this.nextWrite, this.nextWrite);
                    for (int i = this.positions.length; i < newPositions.length; ++i) {
                        newPositions[i] = new Position();
                    }
                    this.nextWrite = this.positions.length;
                    this.positions = newPositions;
                }
                if (this.nextWrite == this.positions.length) {
                    this.nextWrite = 0;
                }
                assert (this.positions[this.nextWrite].count == 0);
                ++this.nextWrite;
                ++this.nextPos;
                this.positions[this.nextWrite].pos = this.positions[this.nextWrite].pos;
                ++this.count;
            }
            assert (this.inBounds(pos));
            int index = this.getIndex(pos);
            assert (this.positions[index].pos == pos);
            return this.positions[index];
        }

        public int getNextPos() {
            return this.nextPos;
        }

        private boolean inBounds(int pos) {
            return pos < this.nextPos && pos >= this.nextPos - this.count;
        }

        private int getIndex(int pos) {
            int index = this.nextWrite - (this.nextPos - pos);
            if (index < 0) {
                index += this.positions.length;
            }
            return index;
        }

        public void freeBefore(int pos) {
            int toFree = this.count - (this.nextPos - pos);
            assert (toFree >= 0);
            assert (toFree <= this.count);
            int index = this.nextWrite - this.count;
            if (index < 0) {
                index += this.positions.length;
            }
            for (int i = 0; i < toFree; ++i) {
                if (index == this.positions.length) {
                    index = 0;
                }
                this.positions[index].reset();
                ++index;
            }
            this.count -= toFree;
        }
    }

    static final class Position {
        int pos;
        int count;
        int[] costs = new int[8];
        int[] lastRightID = new int[8];
        int[] backPos = new int[8];
        int[] backWordPos = new int[8];
        int[] backIndex = new int[8];
        int[] backID = new int[8];
        Type[] backType = new Type[8];

        Position() {
        }

        public void grow() {
            this.costs = ArrayUtil.grow((int[])this.costs, (int)(1 + this.count));
            this.lastRightID = ArrayUtil.grow((int[])this.lastRightID, (int)(1 + this.count));
            this.backPos = ArrayUtil.grow((int[])this.backPos, (int)(1 + this.count));
            this.backWordPos = ArrayUtil.grow((int[])this.backWordPos, (int)(1 + this.count));
            this.backIndex = ArrayUtil.grow((int[])this.backIndex, (int)(1 + this.count));
            this.backID = ArrayUtil.grow((int[])this.backID, (int)(1 + this.count));
            Type[] newBackType = new Type[this.backID.length];
            System.arraycopy(this.backType, 0, newBackType, 0, this.backType.length);
            this.backType = newBackType;
        }

        public void add(int cost, int lastRightID, int backPos, int backRPos, int backIndex, int backID, Type backType) {
            if (this.count == this.costs.length) {
                this.grow();
            }
            this.costs[this.count] = cost;
            this.lastRightID[this.count] = lastRightID;
            this.backPos[this.count] = backPos;
            this.backWordPos[this.count] = backRPos;
            this.backIndex[this.count] = backIndex;
            this.backID[this.count] = backID;
            this.backType[this.count] = backType;
            ++this.count;
        }

        public void reset() {
            this.count = 0;
        }
    }

    public static enum DecompoundMode {
        NONE,
        DISCARD,
        MIXED;

    }

    public static enum Type {
        KNOWN,
        UNKNOWN,
        USER;

    }
}

