/*
 * Decompiled with CFR 0.152.
 */
package org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers;

import org.apache.hyracks.storage.am.lsm.invertedindex.fulltext.TokenizerCategory;
import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.AbstractUTF8StringBinaryTokenizer;
import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.ITokenFactory;
import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.TokenizerInfo;
import org.apache.hyracks.util.string.UTF8StringUtil;

public class DelimitedUTF8StringBinaryTokenizer
extends AbstractUTF8StringBinaryTokenizer {
    protected short tokenCount;
    private boolean tokenCountCalculated;
    private int originalIndex;

    public DelimitedUTF8StringBinaryTokenizer(boolean ignoreTokenCount, boolean sourceHasTypeTag, ITokenFactory tokenFactory) {
        super(ignoreTokenCount, sourceHasTypeTag, tokenFactory);
    }

    @Override
    public void reset(byte[] sentenceData, int start, int length) {
        super.reset(sentenceData, start, length);
        this.tokenCount = 0;
        this.tokenCountCalculated = false;
        this.originalIndex = this.byteIndex;
    }

    @Override
    public boolean hasNext() {
        while (this.byteIndex < this.sentenceEndOffset && DelimitedUTF8StringBinaryTokenizer.isSeparator(UTF8StringUtil.charAt((byte[])this.sentenceBytes, (int)this.byteIndex))) {
            this.byteIndex += UTF8StringUtil.charSize((byte[])this.sentenceBytes, (int)this.byteIndex);
        }
        return this.byteIndex < this.sentenceEndOffset;
    }

    public static boolean isSeparator(char c) {
        return !Character.isLetterOrDigit(c) && Character.getType(c) != 5 && Character.getType(c) != 11;
    }

    @Override
    public void next() {
        int tokenLength = 0;
        int currentTokenStart = this.byteIndex;
        while (this.byteIndex < this.sentenceEndOffset && !DelimitedUTF8StringBinaryTokenizer.isSeparator(UTF8StringUtil.charAt((byte[])this.sentenceBytes, (int)this.byteIndex))) {
            this.byteIndex += UTF8StringUtil.charSize((byte[])this.sentenceBytes, (int)this.byteIndex);
            ++tokenLength;
        }
        int curTokenCount = 1;
        if (tokenLength > 0 && !this.ignoreTokenCount) {
            block1: for (int i = 0; i < this.tokensStart.length(); ++i) {
                if (tokenLength != this.tokensLength.get(i)) continue;
                int tokenStart = this.tokensStart.get(i);
                ++curTokenCount;
                int offset = 0;
                for (int charPos = 0; charPos < tokenLength; ++charPos) {
                    if (Character.toLowerCase(UTF8StringUtil.charAt((byte[])this.sentenceBytes, (int)(currentTokenStart + offset))) != Character.toLowerCase(UTF8StringUtil.charAt((byte[])this.sentenceBytes, (int)(tokenStart + offset)))) {
                        --curTokenCount;
                        continue block1;
                    }
                    offset += UTF8StringUtil.charSize((byte[])this.sentenceBytes, (int)(currentTokenStart + offset));
                }
            }
            this.tokensStart.add(currentTokenStart);
            this.tokensLength.add(tokenLength);
        }
        this.token.reset(this.sentenceBytes, currentTokenStart, this.byteIndex, tokenLength, curTokenCount);
        this.tokenCount = (short)(this.tokenCount + 1);
    }

    @Override
    public short getTokensCount() {
        if (!this.tokenCountCalculated) {
            this.tokenCount = 0;
            boolean previousCharIsSeparator = true;
            while (this.originalIndex < this.sentenceEndOffset) {
                if (DelimitedUTF8StringBinaryTokenizer.isSeparator(UTF8StringUtil.charAt((byte[])this.sentenceBytes, (int)this.originalIndex))) {
                    previousCharIsSeparator = true;
                } else if (previousCharIsSeparator) {
                    this.tokenCount = (short)(this.tokenCount + 1);
                    previousCharIsSeparator = false;
                }
                this.originalIndex += UTF8StringUtil.charSize((byte[])this.sentenceBytes, (int)this.originalIndex);
            }
        }
        return this.tokenCount;
    }

    @Override
    public TokenizerInfo.TokenizerType getTokenizerType() {
        return TokenizerInfo.TokenizerType.STRING;
    }

    @Override
    public TokenizerCategory getTokenizerCategory() {
        return TokenizerCategory.WORD;
    }
}

