/*
 * Decompiled with CFR 0.152.
 */
package org.apache.sysds.runtime.transform.tokenize.applier;

import java.util.Arrays;
import java.util.List;
import org.apache.sysds.common.Types;
import org.apache.sysds.runtime.frame.data.FrameBlock;
import org.apache.sysds.runtime.transform.tokenize.DocumentRepresentation;
import org.apache.sysds.runtime.transform.tokenize.Token;
import org.apache.sysds.runtime.transform.tokenize.applier.TokenizerApplier;
import org.apache.sysds.runtime.util.UtilFunctions;

public class TokenizerApplierPosition
extends TokenizerApplier {
    private static final long serialVersionUID = 3563407270742660830L;

    public TokenizerApplierPosition(int numIdCols, int maxTokens, boolean wideFormat, boolean applyPadding) {
        super(numIdCols, maxTokens, wideFormat, applyPadding);
    }

    @Override
    public int getNumRows(DocumentRepresentation[] internalRepresentation) {
        return this.wideFormat ? internalRepresentation.length : Arrays.stream(internalRepresentation).mapToInt(doc -> this.applyPadding ? this.maxTokens : Math.min(doc.tokens.size(), this.maxTokens)).sum();
    }

    @Override
    public int applyInternalRepresentation(DocumentRepresentation[] internalRepresentation, FrameBlock out, int inputRowStart, int blk) {
        int endIndex = UtilFunctions.getEndIndex(internalRepresentation.length, inputRowStart, blk);
        int outputRow = this.getOutputRow(inputRowStart, internalRepresentation);
        for (int i = inputRowStart; i < endIndex; ++i) {
            List<Object> keys = internalRepresentation[i].keys;
            List<Token> tokenList = internalRepresentation[i].tokens;
            outputRow = this.wideFormat ? this.appendTokensWide(outputRow, keys, tokenList, out) : this.appendTokensLong(outputRow, keys, tokenList, out);
        }
        return outputRow;
    }

    public int appendTokensLong(int row, List<Object> keys, List<Token> tokenList, FrameBlock out) {
        int numTokens = 0;
        for (Token token : tokenList) {
            if (numTokens >= this.maxTokens) break;
            int col = this.setKeys(row, keys, out);
            out.set(row, col, token.getStartIndex(0) + 1L);
            out.set(row, col + 1, token.toString());
            ++row;
            ++numTokens;
        }
        if (this.applyPadding) {
            row = this.applyPaddingLong(row, numTokens, keys, out, -1, "");
        }
        return row;
    }

    public int appendTokensWide(int row, List<Object> keys, List<Token> tokenList, FrameBlock out) {
        int token;
        int numKeys = this.setKeys(row, keys, out);
        for (token = 0; token < tokenList.size() && token < this.maxTokens; ++token) {
            out.set(row, numKeys + token, tokenList.get(token).toString());
        }
        if (this.applyPadding) {
            this.applyPaddingWide(row, numKeys, token, out, "");
        }
        return ++row;
    }

    @Override
    public Types.ValueType[] getOutSchema() {
        if (this.wideFormat) {
            return TokenizerApplierPosition.getOutSchemaWide(this.numIdCols, this.maxTokens);
        }
        return TokenizerApplierPosition.getOutSchemaLong(this.numIdCols);
    }

    private static Types.ValueType[] getOutSchemaWide(int numIdCols, int maxTokens) {
        return UtilFunctions.nCopies(numIdCols + maxTokens, Types.ValueType.STRING);
    }

    private static Types.ValueType[] getOutSchemaLong(int numIdCols) {
        Types.ValueType[] schema = UtilFunctions.nCopies(numIdCols + 2, Types.ValueType.STRING);
        schema[numIdCols] = Types.ValueType.INT64;
        return schema;
    }
}

