diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalUtil.java index db040f1..70a9a9c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalUtil.java @@ -30,7 +30,7 @@ public class DecimalUtil { public static int compare(HiveDecimalWritable writableLeft, HiveDecimal right) { - return writableLeft.getHiveDecimal().compareTo(right); + return writableLeft.compareTo(right); } public static int compare(HiveDecimal left, HiveDecimalWritable writableRight) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java index 55417cf..2992bff 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -47,6 +49,13 @@ public ListIndexColColumn(int listColumnNum, int indexColumnNum, int outputColum @Override public void evaluate(VectorizedRowBatch batch) throws HiveException { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -56,48 +65,459 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { ColumnVector childV = listV.child; LongColumnVector indexColumnVector = (LongColumnVector) batch.cols[indexColumnNum]; long[] indexV = indexColumnVector.vector; + int[] sel = batch.selected; + boolean[] indexIsNull = indexColumnVector.isNull; + boolean[] listIsNull = listV.isNull; + boolean[] outputIsNull = outV.isNull; // We do not need to do a column reset since we are carefully changing the output. outV.isRepeating = false; + /* + * List indices are 0-based. + * + * Do careful maintenance of the outputColVector.noNulls flag since the index may be + * out-of-bounds. + */ + + if (indexColumnVector.isRepeating) { + + /* + * Repeated index or repeated NULL index. + */ + if (indexColumnVector.noNulls || !indexIsNull[0]) { + final long repeatedLongIndex = indexV[0]; + if (repeatedLongIndex < 0) { + + // Invalid index for entire batch. + outputIsNull[0] = true; + outV.noNulls = false; + outV.isRepeating = true; + return; + } + + /* + * Same INDEX for entire batch. Still need to validate the LIST upper limit. + */ + if (listV.isRepeating) { + if (listV.noNulls || !listIsNull[0]) { + final long repeatedLongListLength = listV.lengths[0]; + if (repeatedLongIndex >= repeatedLongListLength) { + outV.isNull[0] = true; + outV.noNulls = false; + } else { + outV.isNull[0] = false; + outV.setElement(0, (int) (listV.offsets[0] + repeatedLongIndex), childV); + } + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + /* + * Individual row processing for LIST vector with *repeated* INDEX instance. + */ + if (listV.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outV.noNulls) { + for (int j = 0; j < n; j++) { + final int i = sel[j]; + final long longListLength = listV.lengths[i]; + if (repeatedLongIndex >= longListLength) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.isNull[i] = false; + outV.setElement(i, (int) (listV.offsets[i] + repeatedLongIndex), childV); + } + } + } else { + for (int j = 0; j < n; j++) { + final int i = sel[j]; + final long longListLength = listV.lengths[i]; + if (repeatedLongIndex >= longListLength) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.setElement(i, (int) (listV.offsets[i] + repeatedLongIndex), childV); + } + } + } + } else { + if (!outV.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outV.noNulls = true; + } + for (int i = 0; i < n; i++) { + final long longListLength = listV.lengths[i]; + if (repeatedLongIndex >= longListLength) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.setElement(i, (int) (listV.offsets[i] + repeatedLongIndex), childV); + } + } + } + } else /* there are NULLs in the LIST */ { + + if (batch.selectedInUse) { + for (int j=0; j != n; j++) { + int i = sel[j]; + if (!listIsNull[i]) { + final long longListLength = listV.lengths[i]; + if (repeatedLongIndex >= longListLength) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.isNull[i] = false; + outV.setElement(i, (int) (listV.offsets[i] + repeatedLongIndex), childV); + } + } else { + outputIsNull[i] = true; + outV.noNulls = false; + } + } + } else { + for (int i = 0; i != n; i++) { + if (!listIsNull[i]) { + final long longListLength = listV.lengths[i]; + if (repeatedLongIndex >= longListLength) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.isNull[i] = false; + outV.setElement(i, (int) (listV.offsets[i] + repeatedLongIndex), childV); + } + } else { + outputIsNull[i] = true; + outV.noNulls = false; + } + } + } + } + } else { + outputIsNull[0] = true; + outV.noNulls = false; + outV.isRepeating = true; + } + return; + } + + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + /* + * Same LIST for entire batch. Still need to validate the LIST upper limit against varing + * INDEX. + * + * (Repeated INDEX case handled above). + */ + if (listV.isRepeating) { - if (listV.isNull[0]) { - outV.isNull[0] = true; + if (listV.noNulls || !listIsNull[0]) { + + /* + * Individual row processing for INDEX vector with *repeated* LIST value. + */ + final long repeatedLongListOffset = listV.offsets[0]; + final long repeatedLongListLength = listV.lengths[0]; + + if (indexColumnVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outV.noNulls) { + for (int j = 0; j != n; j++) { + final int i = sel[j]; + final long longIndex = indexV[i]; + if (longIndex < 0) { + + // Invalid index for entire batch. + outputIsNull[i] = true; + outV.noNulls = false; + } else { + if (longIndex >= repeatedLongListLength) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.isNull[i] = false; + outV.setElement(i, (int) (repeatedLongListOffset + longIndex), childV); + } + } + } + } else { + for (int j = 0; j != n; j++) { + final int i = sel[j]; + final long longIndex = indexV[i]; + if (longIndex < 0) { + + // Invalid index for entire batch. + outputIsNull[i] = true; + outV.noNulls = false; + } else { + if (longIndex >= repeatedLongListLength) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.setElement(i, (int) (repeatedLongListOffset + longIndex), childV); + } + } + } + } + } else { + if (!outV.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outV.noNulls = true; + } + for (int i = 0; i != n; i++) { + final long longIndex = indexV[i]; + if (longIndex < 0) { + + // Invalid index for entire batch. + outputIsNull[i] = true; + outV.noNulls = false; + } else { + if (longIndex >= repeatedLongListLength) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.setElement(i, (int) (repeatedLongListOffset + longIndex), childV); + } + } + } + } + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outV.noNulls flag. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + if (!indexIsNull[i]) { + final long longIndex = indexV[i]; + if (longIndex < 0) { + + // Invalid index for entire batch. + outputIsNull[i] = true; + outV.noNulls = false; + } else { + if (longIndex >= repeatedLongListLength) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.isNull[i] = false; + outV.setElement(i, (int) (repeatedLongListOffset + longIndex), childV); + } + } + } else { + outputIsNull[i] = true; + outV.noNulls = false; + } + } + } else { + for(int i = 0; i != n; i++) { + if (!indexIsNull[i]) { + final long longIndex = indexV[i]; + if (longIndex < 0) { + + // Invalid index for entire batch. + outputIsNull[i] = true; + outV.noNulls = false; + } else { + if (longIndex >= repeatedLongListLength) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.isNull[i] = false; + outV.setElement(i, (int) (repeatedLongListOffset + longIndex), childV); + } + } + } else { + outputIsNull[i] = true; + outV.noNulls = false; + } + } + } + } + } else { + outputIsNull[0] = true; outV.noNulls = false; outV.isRepeating = true; + } + return; + } + + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + /* + * Individual row processing for INDEX vectors and LIST vectors. + */ + final boolean listNoNulls = listV.noNulls; + + if (indexColumnVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outV.noNulls) { + for (int j = 0; j != n; j++) { + final int i = sel[j]; + final long longIndex = indexV[i]; + if (longIndex < 0) { + + // Invalid index for entire batch. + outputIsNull[i] = true; + outV.noNulls = false; + } else { + if (listNoNulls || !listIsNull[i]) { + final long longListLength = listV.lengths[i]; + if (longIndex >= longListLength) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.isNull[i] = false; + outV.setElement(i, (int) (listV.offsets[i] + longIndex), childV); + } + } else { + outputIsNull[i] = true; + outV.noNulls = false; + } + } + } + } else { + for (int j = 0; j != n; j++) { + final int i = sel[j]; + final long longIndex = indexV[i]; + if (longIndex < 0) { + + // Invalid index for entire batch. + outputIsNull[i] = true; + outV.noNulls = false; + } else { + if (listNoNulls || !listIsNull[i]) { + final long longListLength = listV.lengths[i]; + if (longIndex >= longListLength) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.setElement(i, (int) (listV.offsets[i] + longIndex), childV); + } + } else { + outputIsNull[i] = true; + outV.noNulls = false; + } + } + } + } } else { - if (indexColumnVector.isRepeating) { - if (indexV[0] >= listV.lengths[0]) { - outV.isNull[0] = true; + if (!outV.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outV.noNulls = true; + } + for (int i = 0; i != n; i++) { + final long longIndex = indexV[i]; + if (longIndex < 0) { + + // Invalid index for entire batch. + outputIsNull[i] = true; outV.noNulls = false; } else { - outV.isNull[0] = false; - outV.setElement(0, (int) (listV.offsets[0] + indexV[0]), childV); + if (listNoNulls || !listIsNull[i]) { + final long longListLength = listV.lengths[i]; + if (longIndex >= longListLength) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.setElement(i, (int) (listV.offsets[i] + longIndex), childV); + } + } else { + outputIsNull[i] = true; + outV.noNulls = false; + } } - outV.isRepeating = true; - } else { - for (int i = 0; i < batch.size; i++) { - int j = (batch.selectedInUse) ? batch.selected[i] : i; - if (indexV[j] >= listV.lengths[0]) { - outV.isNull[j] = true; + } + } + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outV.noNulls flag. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + if (!indexIsNull[i]) { + final long longIndex = indexV[i]; + if (longIndex < 0) { + + // Invalid index for entire batch. + outputIsNull[i] = true; outV.noNulls = false; } else { - outV.isNull[j] = false; - outV.setElement(j, (int) (listV.offsets[0] + indexV[j]), childV); - + if (listNoNulls || !listIsNull[i]) { + final long longListLength = listV.lengths[i]; + if (longIndex >= longListLength) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.isNull[i] = false; + outV.setElement(i, (int) (listV.offsets[i] + longIndex), childV); + } + } else { + outputIsNull[i] = true; + outV.noNulls = false; + } } + } else { + outputIsNull[i] = true; + outV.noNulls = false; } } - } - } else { - for (int i = 0; i < batch.size; i++) { - int j = (batch.selectedInUse) ? batch.selected[i] : i; - if (listV.isNull[j] || indexV[j] >= listV.lengths[j]) { - outV.isNull[j] = true; - outV.noNulls = false; - } else { - outV.isNull[j] = false; - outV.setElement(j, (int) (listV.offsets[j] + indexV[j]), childV); + } else { + for(int i = 0; i != n; i++) { + if (!indexIsNull[i]) { + final long longIndex = indexV[i]; + if (longIndex < 0) { + + // Invalid index for entire batch. + outputIsNull[i] = true; + outV.noNulls = false; + } else { + if (listNoNulls || !listIsNull[i]) { + final long longListLength = listV.lengths[i]; + if (longIndex >= longListLength) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.isNull[i] = false; + outV.setElement(i, (int) (listV.offsets[i] + longIndex), childV); + } + } else { + outputIsNull[i] = true; + outV.noNulls = false; + } + } + } else { + outputIsNull[i] = true; + outV.noNulls = false; + } } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java index 808e9fb..bb01c1c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -46,6 +48,13 @@ public ListIndexColScalar(int listColumn, int index, int outputColumnNum) { @Override public void evaluate(VectorizedRowBatch batch) throws HiveException { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -53,37 +62,128 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { ColumnVector outV = batch.cols[outputColumnNum]; ListColumnVector listV = (ListColumnVector) batch.cols[listColumnNum]; ColumnVector childV = listV.child; + int[] sel = batch.selected; + boolean[] listIsNull = listV.isNull; + boolean[] outputIsNull = outV.isNull; + + if (index < 0) { + outV.isNull[0] = true; + outV.noNulls = false; + outV.isRepeating = true; + return; + } + + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; /* * Do careful maintenance of the outputColVector.noNulls flag. */ if (listV.isRepeating) { - if (listV.isNull[0]) { - outV.isNull[0] = true; - outV.noNulls = false; - } else { - if (index >= listV.lengths[0]) { + if (listV.noNulls || !listIsNull[0]) { + final long repeatedLongListLength = listV.lengths[0]; + if (index >= repeatedLongListLength) { outV.isNull[0] = true; outV.noNulls = false; } else { outV.isNull[0] = false; outV.setElement(0, (int) (listV.offsets[0] + index), childV); } + } else { + outV.isNull[0] = true; + outV.noNulls = false; } outV.isRepeating = true; - } else { - for (int i = 0; i < batch.size; i++) { - int j = (batch.selectedInUse) ? batch.selected[i] : i; - if (listV.isNull[j] || index >= listV.lengths[j]) { - outV.isNull[j] = true; - outV.noNulls = false; + return; + } + + /* + * Individual row processing for LIST vector with scalar constant INDEX value. + */ + if (listV.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outV.noNulls) { + for (int j = 0; j < n; j++) { + final int i = sel[j]; + final long longListLength = listV.lengths[i]; + if (index >= longListLength) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.isNull[i] = false; + outV.setElement(i, (int) (listV.offsets[i] + index), childV); + } + } } else { - outV.isNull[j] = false; - outV.setElement(j, (int) (listV.offsets[j] + index), childV); + for (int j = 0; j < n; j++) { + final int i = sel[j]; + final long longListLength = listV.lengths[i]; + if (index >= longListLength) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.setElement(i, (int) (listV.offsets[i] + index), childV); + } + } + } + } else { + if (!outV.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outV.isNull, false); + outV.noNulls = true; + } + for (int i = 0; i < n; i++) { + final long longListLength = listV.lengths[i]; + if (index >= longListLength) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.setElement(i, (int) (listV.offsets[i] + index), childV); + } + } + } + } else /* there are NULLs in the LIST */ { + + if (batch.selectedInUse) { + for (int j=0; j != n; j++) { + int i = sel[j]; + if (!listIsNull[i]) { + final long longListLength = listV.lengths[i]; + if (index >= longListLength) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.isNull[i] = false; + outV.setElement(i, (int) (listV.offsets[i] + index), childV); + } + } else { + outputIsNull[i] = true; + outV.noNulls = false; + } + } + } else { + for (int i = 0; i != n; i++) { + if (!listIsNull[i]) { + final long longListLength = listV.lengths[i]; + if (index >= longListLength) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.isNull[i] = false; + outV.setElement(i, (int) (listV.offsets[i] + index), childV); + } + } else { + outputIsNull[i] = true; + outV.noNulls = false; + } } } - outV.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBase.java deleted file mode 100644 index 3df4bce..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBase.java +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; - -/** - * Superclass to support vectorized functions that take a parameter as key of Map - * and return the value of Map. - */ -public abstract class VectorUDFMapIndexBase extends VectorExpression { - - private static final long serialVersionUID = 1L; - - public VectorUDFMapIndexBase() { - super(); - } - - public VectorUDFMapIndexBase(int outputColumnNum) { - super(outputColumnNum); - } - - /** - * The index array of MapColumnVector is used to get the value from MapColumnVector based on the - * index, the following are the steps to get it: - * 1. Get the current key which is a scalar or from a ColumnVector. - * 2. Compare the current key and the key from MapColumnVector. - * 3. Set the index of MapColumnVector to the result array if the keys are same. - */ - protected int[] getMapValueIndex(MapColumnVector mapV, VectorizedRowBatch batch) { - int[] indexArray = new int[VectorizedRowBatch.DEFAULT_SIZE]; - for (int i = 0; i < batch.size; i++) { - boolean findKey = false; - int offset = (batch.selectedInUse) ? batch.selected[i] : i; - Object columnKey = getCurrentKey(offset); - for (int j = 0; j < mapV.lengths[offset]; j++) { - int index = (int)(mapV.offsets[offset] + j); - Object tempKey = getKeyByIndex(mapV.keys, index); - if (compareKey(columnKey, tempKey)) { - indexArray[offset] = j; - findKey = true; - break; - } - } - if (!findKey) { - indexArray[offset] = -1; - } - if (mapV.isRepeating) { - break; - } - } - return indexArray; - } - - protected boolean compareKey(Object columnKey, Object otherKey) { - if (columnKey == null && otherKey == null) { - return true; - } else if (columnKey != null && otherKey != null) { - return compareKeyInternal(columnKey, otherKey); - } else { - return false; - } - } - - protected boolean compareKeyInternal(Object columnKey, Object otherKey) { - return columnKey.equals(otherKey); - } - - abstract Object getKeyByIndex(ColumnVector cv, int index); - - abstract Object getCurrentKey(int index); -} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java index 157154a..67f4d55 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -27,7 +29,7 @@ * Superclass to support vectorized functions that take a column value as key of Map * and return the value of Map. */ -public abstract class VectorUDFMapIndexBaseCol extends VectorUDFMapIndexBase { +public abstract class VectorUDFMapIndexBaseCol extends VectorExpression { private static final long serialVersionUID = 1L; @@ -47,6 +49,13 @@ public VectorUDFMapIndexBaseCol(int mapColumnNum, int indexColumnNum, int output @Override public void evaluate(VectorizedRowBatch batch) throws HiveException { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -55,61 +64,380 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { MapColumnVector mapV = (MapColumnVector) batch.cols[mapColumnNum]; // indexColumnVector includes the keys of Map indexColumnVector = batch.cols[indexColumnNum]; + ColumnVector valuesV = mapV.values; + + int[] sel = batch.selected; + boolean[] indexIsNull = indexColumnVector.isNull; + boolean[] mapIsNull = mapV.isNull; + boolean[] outputIsNull = outV.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; /* * Do careful maintenance of the outputColVector.noNulls flag. */ - int[] mapValueIndex; + if (indexColumnVector.isRepeating) { + + /* + * Repeated index or repeated NULL index. + */ + if (indexColumnVector.noNulls || !indexIsNull[0]) { + + /* + * Same INDEX for entire batch. + */ + if (mapV.isRepeating) { + if (mapV.noNulls || !mapIsNull[0]) { + final int repeatedMapIndex = findInMap(indexColumnVector, 0, mapV, 0); + if (repeatedMapIndex == -1) { + outV.isNull[0] = true; + outV.noNulls = false; + } else { + outV.isNull[0] = false; + outV.setElement(0, repeatedMapIndex, valuesV); + } + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + /* + * Individual row processing for LIST vector with *repeated* INDEX value. + */ + if (mapV.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outV.noNulls) { + for (int j = 0; j < n; j++) { + final int i = sel[j]; + final int mapIndex = findInMap(indexColumnVector, 0, mapV, i); + if (mapIndex == -1) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.isNull[i] = false; + outV.setElement(i, mapIndex, valuesV); + } + } + } else { + for (int j = 0; j < n; j++) { + final int i = sel[j]; + final int mapIndex = findInMap(indexColumnVector, 0, mapV, i); + if (mapIndex == -1) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.setElement(i, mapIndex, valuesV); + } + } + } + } else { + if (!outV.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outV.noNulls = true; + } + for (int i = 0; i < n; i++) { + final int mapIndex = findInMap(indexColumnVector, 0, mapV, i); + if (mapIndex == -1) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.setElement(i, mapIndex, valuesV); + } + } + } + } else /* there are NULLs in the LIST */ { + + if (batch.selectedInUse) { + for (int j=0; j != n; j++) { + int i = sel[j]; + if (!mapIsNull[i]) { + final int mapIndex = findInMap(indexColumnVector, 0, mapV, i); + if (mapIndex == -1) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.isNull[i] = false; + outV.setElement(i, mapIndex, valuesV); + } + } else { + outputIsNull[i] = true; + outV.noNulls = false; + } + } + } else { + for (int i = 0; i != n; i++) { + if (!mapIsNull[i]) { + final int mapIndex = findInMap(indexColumnVector, 0, mapV, i); + if (mapIndex == -1) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.isNull[i] = false; + outV.setElement(i, mapIndex, valuesV); + } + } else { + outputIsNull[i] = true; + outV.noNulls = false; + } + } + } + } + } else { + outputIsNull[0] = true; + outV.noNulls = false; + outV.isRepeating = true; + } + return; + } + + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + /* + * Same MAP instance for entire batch. + * + * (Repeated INDEX case handled above). + */ + if (mapV.isRepeating) { - if (mapV.isNull[0]) { - outV.isNull[0] = true; + if (mapV.noNulls || !mapIsNull[0]) { + + /* + * Individual row processing for INDEX vector with *repeated* MAP instance. + */ + + if (indexColumnVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outV.noNulls) { + for (int j = 0; j != n; j++) { + final int i = sel[j]; + final int mapIndex = findInMap(indexColumnVector, i, mapV, 0); + if (mapIndex == -1) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.isNull[i] = false; + outV.setElement(i, mapIndex, valuesV); + } + } + } else { + for (int j = 0; j != n; j++) { + final int i = sel[j]; + final int mapIndex = findInMap(indexColumnVector, i, mapV, 0); + if (mapIndex == -1) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.setElement(i, mapIndex, valuesV); + } + } + } + } else { + if (!outV.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outV.noNulls = true; + } + for (int i = 0; i != n; i++) { + final int mapIndex = findInMap(indexColumnVector, i, mapV, 0); + if (mapIndex == -1) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.setElement(i, mapIndex, valuesV); + } + } + } + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outV.noNulls flag. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + if (!indexIsNull[i]) { + final int mapIndex = findInMap(indexColumnVector, i, mapV, 0); + if (mapIndex == -1) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.isNull[i] = false; + outV.setElement(i, mapIndex, valuesV); + } + } else { + outputIsNull[i] = true; + outV.noNulls = false; + } + } + } else { + for(int i = 0; i != n; i++) { + if (!indexIsNull[i]) { + final int mapIndex = findInMap(indexColumnVector, i, mapV, 0); + if (mapIndex == -1) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.isNull[i] = false; + outV.setElement(i, mapIndex, valuesV); + } + } else { + outputIsNull[i] = true; + outV.noNulls = false; + } + } + } + } + } else { + outputIsNull[0] = true; outV.noNulls = false; outV.isRepeating = true; + } + return; + } + + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + /* + * Individual row processing for INDEX vectors and LIST vectors. + */ + final boolean listNoNulls = mapV.noNulls; + + if (indexColumnVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outV.noNulls) { + for (int j = 0; j != n; j++) { + final int i = sel[j]; + + if (listNoNulls || !mapIsNull[i]) { + final int mapIndex = findInMap(indexColumnVector, i, mapV, i); + if (mapIndex == -1) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.isNull[i] = false; + outV.setElement(i, mapIndex, valuesV); + } + } else { + outputIsNull[i] = true; + outV.noNulls = false; + } + } + } else { + for (int j = 0; j != n; j++) { + final int i = sel[j]; + if (listNoNulls || !mapIsNull[i]) { + final int mapIndex = findInMap(indexColumnVector, i, mapV, i); + if (mapIndex == -1) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.setElement(i, mapIndex, valuesV); + } + } else { + outputIsNull[i] = true; + outV.noNulls = false; + } + } + } } else { - mapValueIndex = getMapValueIndex(mapV, batch); - if (indexColumnVector.isRepeating) { - // the key is not found in MapColumnVector, set the output as null ColumnVector - if (mapValueIndex[0] == -1) { - outV.isNull[0] = true; - outV.noNulls = false; + if (!outV.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outV.noNulls = true; + } + for (int i = 0; i != n; i++) { + if (listNoNulls || !mapIsNull[i]) { + final int mapIndex = findInMap(indexColumnVector, i, mapV, i); + if (mapIndex == -1) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.setElement(i, mapIndex, valuesV); + } } else { - // the key is found in MapColumnVector, set the value - outV.isNull[0] = false; - outV.setElement(0, (int) (mapV.offsets[0] + mapValueIndex[0]), mapV.values); + outputIsNull[i] = true; + outV.noNulls = false; } - outV.isRepeating = true; - } else { - setUnRepeatingOutVector(batch, mapV, outV, mapValueIndex); } } - } else { - mapValueIndex = getMapValueIndex(mapV, batch); - setUnRepeatingOutVector(batch, mapV, outV, mapValueIndex); - } - } + } else /* there are NULLs in the inputColVector */ { - /** - * Set the output based on the index array of MapColumnVector. - */ - private void setUnRepeatingOutVector(VectorizedRowBatch batch, MapColumnVector mapV, - ColumnVector outV, int[] mapValueIndex) { - for (int i = 0; i < batch.size; i++) { - int j = (batch.selectedInUse) ? batch.selected[i] : i; - if (mapV.isNull[j] || mapValueIndex[j] == -1) { - outV.isNull[j] = true; - outV.noNulls = false; + /* + * Do careful maintenance of the outV.noNulls flag. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + if (!indexIsNull[i]) { + if (listNoNulls || !mapIsNull[i]) { + final int mapIndex = findInMap(indexColumnVector, i, mapV, i); + if (mapIndex == -1) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.isNull[i] = false; + outV.setElement(i, mapIndex, valuesV); + } + } else { + outputIsNull[i] = true; + outV.noNulls = false; + } + } else { + outputIsNull[i] = true; + outV.noNulls = false; + } + } } else { - outV.isNull[j] = false; - outV.setElement(j, (int) (mapV.offsets[j] + mapValueIndex[j]), mapV.values); + for(int i = 0; i != n; i++) { + if (!indexIsNull[i]) { + if (listNoNulls || !mapIsNull[i]) { + final int mapIndex = findInMap(indexColumnVector, i, mapV, i); + if (mapIndex == -1) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.isNull[i] = false; + outV.setElement(i, mapIndex, valuesV); + } + } else { + outputIsNull[i] = true; + outV.noNulls = false; + } + } else { + outputIsNull[i] = true; + outV.noNulls = false; + } + } } } - outV.isRepeating = false; } - @Override - protected Object getCurrentKey(int index) { - return getKeyByIndex(indexColumnVector, index); + public int findInMap(ColumnVector indexColumnVector, int indexBatchIndex, + MapColumnVector mapColumnVector, int mapBatchIndex) { + throw new RuntimeException("Not implemented"); } public int getMapColumnNum() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java index 72662e0..e7bb4d1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -27,7 +29,7 @@ * Superclass to support vectorized functions that take a scalar as key of Map * and return the value of Map. */ -public abstract class VectorUDFMapIndexBaseScalar extends VectorUDFMapIndexBase { +public abstract class VectorUDFMapIndexBaseScalar extends VectorExpression { private static final long serialVersionUID = 1L; @@ -44,50 +46,144 @@ public VectorUDFMapIndexBaseScalar(int mapColumnNum, int outputColumnNum) { @Override public void evaluate(VectorizedRowBatch batch) throws HiveException { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } ColumnVector outV = batch.cols[outputColumnNum]; MapColumnVector mapV = (MapColumnVector) batch.cols[mapColumnNum]; + ColumnVector valuesV = mapV.values; + + int[] sel = batch.selected; + boolean[] mapIsNull = mapV.isNull; + boolean[] outputIsNull = outV.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; /* * Do careful maintenance of the outputColVector.noNulls flag. */ - int[] mapValueIndex; if (mapV.isRepeating) { - if (mapV.isNull[0]) { - outV.isNull[0] = true; - outV.noNulls = false; - } else { - mapValueIndex = getMapValueIndex(mapV, batch); - if (mapValueIndex[0] == -1) { - // the key is not found in MapColumnVector, set the output as null ColumnVector + if (mapV.noNulls || !mapIsNull[0]) { + final int repeatedMapIndex = findScalarInMap(mapV, 0); + if (repeatedMapIndex == -1) { outV.isNull[0] = true; outV.noNulls = false; } else { - // the key is found in MapColumnVector, set the value - outV.setElement(0, (int) (mapV.offsets[0] + mapValueIndex[0]), mapV.values); + outV.isNull[0] = false; + outV.setElement(0, repeatedMapIndex, valuesV); } + } else { + outV.isNull[0] = true; + outV.noNulls = false; } outV.isRepeating = true; - } else { - mapValueIndex = getMapValueIndex(mapV, batch); - for (int i = 0; i < batch.size; i++) { - int j = (batch.selectedInUse) ? batch.selected[i] : i; - if (mapV.isNull[j] || mapValueIndex[j] == -1) { - outV.isNull[j] = true; - outV.noNulls = false; + return; + } + + /* + * Individual row processing for LIST vector with scalar constant INDEX value. + */ + if (mapV.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outV.noNulls) { + for (int j = 0; j < n; j++) { + final int i = sel[j]; + final int mapIndex = findScalarInMap(mapV, i); + if (mapIndex == -1) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.isNull[i] = false; + outV.setElement(i, mapIndex, valuesV); + } + } } else { - outV.isNull[j] = false; - outV.setElement(j, (int) (mapV.offsets[j] + mapValueIndex[j]), mapV.values); + for (int j = 0; j < n; j++) { + final int i = sel[j]; + final int mapIndex = findScalarInMap(mapV, i); + if (mapIndex == -1) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.setElement(i, mapIndex, valuesV); + } + } + } + } else { + if (!outV.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outV.isNull, false); + outV.noNulls = true; + } + for (int i = 0; i < n; i++) { + final long longListLength = mapV.lengths[i]; + final int mapIndex = findScalarInMap(mapV, i); + if (mapIndex == -1) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.setElement(i, mapIndex, valuesV); + } + } + } + } else /* there are NULLs in the MAP */ { + + if (batch.selectedInUse) { + for (int j=0; j != n; j++) { + int i = sel[j]; + if (!mapIsNull[i]) { + final int mapIndex = findScalarInMap(mapV, i); + if (mapIndex == -1) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.isNull[i] = false; + outV.setElement(i, mapIndex, valuesV); + } + } else { + outputIsNull[i] = true; + outV.noNulls = false; + } + } + } else { + for (int i = 0; i != n; i++) { + if (!mapIsNull[i]) { + final int mapIndex = findScalarInMap(mapV, i); + if (mapIndex == -1) { + outV.isNull[i] = true; + outV.noNulls = false; + } else { + outV.isNull[i] = false; + outV.setElement(i, mapIndex, valuesV); + } + } else { + outputIsNull[i] = true; + outV.noNulls = false; + } } } - outV.isRepeating = false; } } + public int findScalarInMap(MapColumnVector mapColumnVector, int mapBatchIndex) { + throw new RuntimeException("Not implemented"); + } + public int getMapColumnNum() { return mapColumnNum; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDecimalCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDecimalCol.java new file mode 100644 index 0000000..d700799 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDecimalCol.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +/** + * Returns value of Map. + * Extends {@link VectorUDFMapIndexBaseCol} + */ +public class VectorUDFMapIndexDecimalCol extends VectorUDFMapIndexBaseCol { + + public VectorUDFMapIndexDecimalCol() { + super(); + } + + public VectorUDFMapIndexDecimalCol(int mapColumnNum, int indexColumnNum, int outputColumnNum) { + super(mapColumnNum, indexColumnNum, outputColumnNum); + } + + @Override + public String vectorExpressionParameters() { + return getColumnParamString(0, getMapColumnNum()) + ", key: " + + getColumnParamString(1, getIndexColumnNum()); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.MAP, + VectorExpressionDescriptor.ArgumentType.DECIMAL) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } + + @Override + public int findInMap(ColumnVector indexColumnVector, int indexBatchIndex, + MapColumnVector mapColumnVector, int mapBatchIndex) { + final int offset = (int) mapColumnVector.offsets[mapBatchIndex]; + final int count = (int) mapColumnVector.lengths[mapBatchIndex]; + HiveDecimalWritable[] keys = ((DecimalColumnVector) mapColumnVector.keys).vector; + final HiveDecimalWritable index = + ((DecimalColumnVector) indexColumnVector).vector[indexBatchIndex]; + for (int i = 0; i < count; i++) { + if (index.compareTo(keys[offset + i]) == 0) { + return offset + i; + } + } + return -1; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDecimalScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDecimalScalar.java new file mode 100644 index 0000000..7bdc555 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDecimalScalar.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +/** + * Returns value of Map. + * Extends {@link VectorUDFMapIndexBaseScalar} + */ +public class VectorUDFMapIndexDecimalScalar extends VectorUDFMapIndexBaseScalar { + + private static final long serialVersionUID = 1L; + + private HiveDecimal key; + private double doubleKey; + + public VectorUDFMapIndexDecimalScalar() { + super(); + } + + public VectorUDFMapIndexDecimalScalar(int mapColumnNum, HiveDecimal key, int outputColumnNum) { + super(mapColumnNum, outputColumnNum); + this.key = key; + doubleKey = key.doubleValue(); + } + + @Override + public String vectorExpressionParameters() { + return getColumnParamString(0, getMapColumnNum()) + ", key: " + key; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.MAP, + VectorExpressionDescriptor.ArgumentType.DECIMAL) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } + + @Override + public int findScalarInMap(MapColumnVector mapColumnVector, int mapBatchIndex) { + final int offset = (int) mapColumnVector.offsets[mapBatchIndex]; + final int count = (int) mapColumnVector.lengths[mapBatchIndex]; + + ColumnVector keys = mapColumnVector.keys; + if (keys instanceof DecimalColumnVector) { + HiveDecimalWritable[] decimalKeyVector = ((DecimalColumnVector) keys).vector; + for (int i = 0; i < count; i++) { + if (decimalKeyVector[offset + i].compareTo(key) == 0) { + return offset + i; + } + } + } else { + + // For some strange reason we receive a double column vector... + // The way we do VectorExpressionDescriptor may be inadequate in this case... + double[] doubleKeyVector = ((DoubleColumnVector) keys).vector; + for (int i = 0; i < count; i++) { + if (doubleKeyVector[offset + i] == doubleKey) { + return offset + i; + } + } + } + return -1; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleCol.java index 48c64a7..ca05269 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleCol.java @@ -20,6 +20,7 @@ import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; /** @@ -57,7 +58,17 @@ public String vectorExpressionParameters() { } @Override - protected Object getKeyByIndex(ColumnVector cv, int index) { - return ((DoubleColumnVector) cv).vector[index]; + public int findInMap(ColumnVector indexColumnVector, int indexBatchIndex, + MapColumnVector mapColumnVector, int mapBatchIndex) { + final int offset = (int) mapColumnVector.offsets[mapBatchIndex]; + final int count = (int) mapColumnVector.lengths[mapBatchIndex]; + double[] keys = ((DoubleColumnVector) mapColumnVector.keys).vector; + final double index = ((DoubleColumnVector) indexColumnVector).vector[indexBatchIndex]; + for (int i = 0; i < count; i++) { + if (index == keys[offset + i]) { + return offset + i; + } + } + return -1; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleScalar.java index d624176..5c064f6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleScalar.java @@ -18,9 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; /** @@ -29,13 +28,15 @@ */ public class VectorUDFMapIndexDoubleScalar extends VectorUDFMapIndexBaseScalar { - private HiveDecimal key; + private static final long serialVersionUID = 1L; + + private double key; public VectorUDFMapIndexDoubleScalar() { super(); } - public VectorUDFMapIndexDoubleScalar(int mapColumnNum, HiveDecimal key, int outputColumnNum) { + public VectorUDFMapIndexDoubleScalar(int mapColumnNum, double key, int outputColumnNum) { super(mapColumnNum, outputColumnNum); this.key = key; } @@ -53,24 +54,23 @@ public String vectorExpressionParameters() { .setNumArguments(2) .setArgumentTypes( VectorExpressionDescriptor.ArgumentType.MAP, - VectorExpressionDescriptor.ArgumentType.DECIMAL) + VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); } @Override - protected Object getKeyByIndex(ColumnVector cv, int index) { - return ((DoubleColumnVector) cv).vector[index]; - } - - @Override - public Object getCurrentKey(int index) { - return key; + public int findScalarInMap(MapColumnVector mapColumnVector, int mapBatchIndex) { + final int offset = (int) mapColumnVector.offsets[mapBatchIndex]; + final int count = (int) mapColumnVector.lengths[mapBatchIndex]; + double[] keys = ((DoubleColumnVector) mapColumnVector.keys).vector; + for (int i = 0; i < count; i++) { + if (key == keys[offset + i]) { + return offset + i; + } + } + return -1; } - @Override - protected boolean compareKeyInternal(Object columnKey, Object otherKey) { - return otherKey.equals(((HiveDecimal) columnKey).doubleValue()); - } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongCol.java index 5094d0b..482d83f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongCol.java @@ -20,6 +20,7 @@ import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; /** @@ -57,7 +58,17 @@ public String vectorExpressionParameters() { } @Override - protected Object getKeyByIndex(ColumnVector cv, int index) { - return ((LongColumnVector) cv).vector[index]; + public int findInMap(ColumnVector indexColumnVector, int indexBatchIndex, + MapColumnVector mapColumnVector, int mapBatchIndex) { + final int offset = (int) mapColumnVector.offsets[mapBatchIndex]; + final int count = (int) mapColumnVector.lengths[mapBatchIndex]; + long[] keys = ((LongColumnVector) mapColumnVector.keys).vector; + final long index = ((LongColumnVector) indexColumnVector).vector[indexBatchIndex]; + for (int i = 0; i < count; i++) { + if (index == keys[offset + i]) { + return offset + i; + } + } + return -1; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongScalar.java index f7433e6..e604503 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongScalar.java @@ -18,8 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; -import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; /** @@ -28,6 +28,8 @@ */ public class VectorUDFMapIndexLongScalar extends VectorUDFMapIndexBaseScalar { + private static final long serialVersionUID = 1L; + private long key; public VectorUDFMapIndexLongScalar() { @@ -59,12 +61,15 @@ public String vectorExpressionParameters() { } @Override - protected Object getKeyByIndex(ColumnVector cv, int index) { - return ((LongColumnVector) cv).vector[index]; - } - - @Override - public Object getCurrentKey(int index) { - return key; + public int findScalarInMap(MapColumnVector mapColumnVector, int mapBatchIndex) { + final int offset = (int) mapColumnVector.offsets[mapBatchIndex]; + final int count = (int) mapColumnVector.lengths[mapBatchIndex]; + long[] keys = ((LongColumnVector) mapColumnVector.keys).vector; + for (int i = 0; i < count; i++) { + if (key == keys[offset + i]) { + return offset + i; + } + } + return -1; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringCol.java index 4eefc6f..52c0909 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringCol.java @@ -21,6 +21,8 @@ import org.apache.commons.lang.ArrayUtils; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import java.util.Arrays; @@ -60,14 +62,25 @@ public String vectorExpressionParameters() { } @Override - protected Object getKeyByIndex(ColumnVector cv, int index) { - BytesColumnVector bytesCV = (BytesColumnVector) cv; - return ArrayUtils.subarray(bytesCV.vector[index], bytesCV.start[index], - bytesCV.start[index] + bytesCV.length[index]); - } - - @Override - protected boolean compareKeyInternal(Object columnKey, Object otherKey) { - return Arrays.equals((byte[])columnKey, (byte[]) otherKey); + public int findInMap(ColumnVector indexColumnVector, int indexBatchIndex, + MapColumnVector mapColumnVector, int mapBatchIndex) { + final int offset = (int) mapColumnVector.offsets[mapBatchIndex]; + final int count = (int) mapColumnVector.lengths[mapBatchIndex]; + BytesColumnVector keyColVector = (BytesColumnVector) mapColumnVector.keys; + byte[][] keyVector = keyColVector.vector; + int[] keyStart = keyColVector.start; + int[] keyLength = keyColVector.length; + BytesColumnVector indexColVector = (BytesColumnVector) indexColumnVector; + byte[] indexBytes = indexColVector.vector[indexBatchIndex]; + int indexStart = indexColVector.start[indexBatchIndex]; + int indexLength = indexColVector.length[indexBatchIndex]; + for (int i = 0; i < count; i++) { + final int keyOffset = offset + i; + if (StringExpr.equal(indexBytes, indexStart, indexLength, + keyVector[keyOffset], keyStart[keyOffset], keyLength[keyOffset])) { + return offset + i; + } + } + return -1; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringScalar.java index b08cd3a..eb13414 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringScalar.java @@ -21,6 +21,7 @@ import org.apache.commons.lang.ArrayUtils; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import java.util.Arrays; @@ -62,19 +63,20 @@ public String vectorExpressionParameters() { } @Override - protected Object getKeyByIndex(ColumnVector cv, int index) { - BytesColumnVector bytesCV = (BytesColumnVector) cv; - return ArrayUtils.subarray(bytesCV.vector[index], bytesCV.start[index], - bytesCV.start[index] + bytesCV.length[index]); - } - - @Override - public Object getCurrentKey(int index) { - return key; - } - - @Override - protected boolean compareKeyInternal(Object columnKey, Object otherKey) { - return Arrays.equals((byte[])columnKey, (byte[]) otherKey); + public int findScalarInMap(MapColumnVector mapColumnVector, int mapBatchIndex) { + final int offset = (int) mapColumnVector.offsets[mapBatchIndex]; + final int count = (int) mapColumnVector.lengths[mapBatchIndex]; + BytesColumnVector keyColVector = (BytesColumnVector) mapColumnVector.keys; + byte[][] keyVector = keyColVector.vector; + int[] keyStart = keyColVector.start; + int[] keyLength = keyColVector.length; + for (int i = 0; i < count; i++) { + final int keyOffset = offset + i; + if (StringExpr.equal(key, 0, key.length, + keyVector[keyOffset], keyStart[keyOffset], keyLength[keyOffset])) { + return offset + i; + } + } + return -1; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java index 4c0cb2b..13cc284 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java @@ -25,6 +25,8 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.ListIndexColColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.ListIndexColScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMapIndexDecimalCol; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMapIndexDecimalScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMapIndexDoubleCol; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMapIndexDoubleScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMapIndexLongCol; @@ -48,8 +50,9 @@ @Description(name = "index", value = "_FUNC_(a, n) - Returns the n-th element of a ") @VectorizedExpressions({ListIndexColScalar.class, ListIndexColColumn.class, VectorUDFMapIndexStringScalar.class, VectorUDFMapIndexLongScalar.class, - VectorUDFMapIndexDoubleScalar.class, VectorUDFMapIndexStringCol.class, - VectorUDFMapIndexLongCol.class, VectorUDFMapIndexDoubleCol.class}) + VectorUDFMapIndexDoubleScalar.class, VectorUDFMapIndexDecimalScalar.class, + VectorUDFMapIndexStringCol.class, VectorUDFMapIndexLongCol.class, + VectorUDFMapIndexDoubleCol.class, VectorUDFMapIndexDecimalCol.class}) public class GenericUDFIndex extends GenericUDF { private transient MapObjectInspector mapOI; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java index dfbf9d4..af73ee6 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java @@ -22,8 +22,10 @@ import java.text.SimpleDateFormat; import java.text.ParseException; import java.util.ArrayList; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Random; import java.util.Set; @@ -925,7 +927,60 @@ public static Object randomStringFamily(Random random, TypeInfo typeInfo, { List valueList = generationSpec.getValueList(); final int valueCount = valueList.size(); - object = valueList.get(r.nextInt(valueCount)); + + TypeInfo typeInfo = generationSpec.getTypeInfo(); + Category category = typeInfo.getCategory(); + switch (category) { + case PRIMITIVE: + case STRUCT: + object = valueList.get(r.nextInt(valueCount)); + break; + case LIST: + { + final int elementCount = r.nextInt(valueCount); + + ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo; + TypeInfo elementTypeInfo = listTypeInfo.getListElementTypeInfo(); + final ObjectInspector elementObjectInspector = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( + elementTypeInfo); + List list = new ArrayList(elementCount); + for (int i = 0; i < elementCount; i++) { + Object elementWritable = + randomWritable(elementTypeInfo, elementObjectInspector, + allowNull); + list.add(elementWritable); + } + object = list; + } + break; + case MAP: + { + final int elementCount = r.nextInt(valueCount); + + MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo; + TypeInfo valueTypeInfo = mapTypeInfo.getMapValueTypeInfo(); + final ObjectInspector valueObjectInspector = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( + valueTypeInfo); + Map map = new HashMap(elementCount); + for (int i = 0; i < elementCount; i++) { + Object key = valueList.get(r.nextInt(valueCount)); + Object valueWritable = + randomWritable(valueTypeInfo, valueObjectInspector, + allowNull); + if (!map.containsKey(key)) { + map.put( + key, + valueWritable); + } + } + object = map; + } + break; + default: + throw new RuntimeException("Unexpected category " + category); + } } break; default: diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorBetweenIn.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorBetweenIn.java index 3f1a137..5b69bdf 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorBetweenIn.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorBetweenIn.java @@ -342,9 +342,9 @@ private boolean doBetweenInVariation(Random random, String typeName, List sortedList = new ArrayList(valueCount); sortedList.addAll(valueList); - Object object = valueList.get(0); + Object exampleObject = valueList.get(0); WritableComparator writableComparator = - WritableComparator.get((Class) object.getClass()); + WritableComparator.get((Class) exampleObject.getClass()); sortedList.sort(writableComparator); final boolean isInvert; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCoalesceElt.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCoalesceElt.java new file mode 100644 index 0000000..f41eeb6 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCoalesceElt.java @@ -0,0 +1,523 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Random; +import java.util.Set; +import java.util.stream.IntStream; + +import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.SupportedTypes; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.TestVectorArithmetic.ColumnScalarMode; +import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCoalesce; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFElt; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.WritableComparator; +import org.apache.hadoop.io.WritableComparable; + +import junit.framework.Assert; + +import org.junit.Ignore; +import org.junit.Test; + +public class TestVectorCoalesceElt { + + @Test + public void testCoalesce() throws Exception { + Random random = new Random(5371); + + doCoalesceElt(random, /* isCoalesce */ true, false); + } + + @Test + public void testElt() throws Exception { + Random random = new Random(5371); + + // Grind through a few more index values... + for (int i = 0; i < 4; i++) { + doCoalesceElt(random, /* isCoalesce */ false, false); + doCoalesceElt(random, /* isCoalesce */ false, true); + } + } + + public enum CoalesceEltTestMode { + ROW_MODE, + ADAPTOR, + VECTOR_EXPRESSION; + + static final int count = values().length; + } + + private void doCoalesceElt(Random random, boolean isCoalesce, boolean isEltIndexConst) + throws Exception { + + doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 2, + /* constantColumns */ null, /* nullConstantColumns */ null, /* allowNulls */ true); + doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 2, + /* constantColumns */ null, /* nullConstantColumns */ null, /* allowNulls */ false); + + doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 3, + /* constantColumns */ null, /* nullConstantColumns */ null, /* allowNulls */ true); + doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 3, + new int[] { 0 }, /* nullConstantColumns */ null, /* allowNulls */ true); + doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 3, + new int[] { 0 }, /* nullConstantColumns */ new int[] { 0 }, /* allowNulls */ true); + doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 3, + new int[] { 1 }, /* nullConstantColumns */ null, /* allowNulls */ true); + doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 3, + new int[] { 1 }, /* nullConstantColumns */ new int[] { 1 }, /* allowNulls */ true); + doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 3, + new int[] { 0, 2 }, /* nullConstantColumns */ null, /* allowNulls */ true); + doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 3, + new int[] { 0, 2 }, /* nullConstantColumns */ new int[] { 0 }, /* allowNulls */ true); + doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 3, + new int[] { 0, 2 }, /* nullConstantColumns */ new int[] { 0, 2 }, /* allowNulls */ false); + + doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 4, + /* constantColumns */ null, /* nullConstantColumns */ null, /* allowNulls */ true); + doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 4, + /* constantColumns */ null, /* nullConstantColumns */ null, /* allowNulls */ false); + doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 4, + new int[] { 0, 1, 2 }, /* nullConstantColumns */ new int[] { 0, 1, 2 }, /* allowNulls */ true); + doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 4, + new int[] { 0, 1, 2 }, /* nullConstantColumns */ new int[] { 0, 1, 2 }, /* allowNulls */ false); + } + + private boolean contains(int[] columns, int column) { + if (columns == null) { + return false; + } + for (int i = 0; i < columns.length; i++) { + if (columns[i] == column) { + return true; + } + } + return false; + } + + private boolean doCoalesceOnRandomDataType(Random random, + boolean isCoalesce, boolean isEltIndexConst, int columnCount, + int[] constantColumns, int[] nullConstantColumns, boolean allowNulls) + throws Exception { + + String typeName; + if (isCoalesce) { + typeName = + VectorRandomRowSource.getRandomTypeName( + random, SupportedTypes.PRIMITIVES, /* allowedTypeNameSet */ null); + typeName = + VectorRandomRowSource.getDecoratedTypeName( + random, typeName, SupportedTypes.PRIMITIVES, /* allowedTypeNameSet */ null, + /* depth */ 0, /* maxDepth */ 2); + } else { + // ELT only choose between STRINGs. + typeName = "string"; + } + + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + + //---------------------------------------------------------------------------------------------- + + final TypeInfo intTypeInfo; + ObjectInspector intObjectInspector; + if (isCoalesce) { + intTypeInfo = null; + intObjectInspector = null; + } else { + intTypeInfo = TypeInfoFactory.intTypeInfo; + intObjectInspector = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( + intTypeInfo); + } + + ObjectInspector objectInspector = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( + typeInfo); + + //---------------------------------------------------------------------------------------------- + + List generationSpecList = new ArrayList(); + List explicitDataTypePhysicalVariationList = + new ArrayList(); + List columns = new ArrayList(); + List children = new ArrayList(); + + int columnNum = 1; + if (!isCoalesce) { + + List intValueList = new ArrayList(); + for (int i = -1; i < columnCount + 2; i++) { + intValueList.add(i); + } + final int intValueListCount = intValueList.size(); + ExprNodeDesc colExpr; + if (!isEltIndexConst) { + generationSpecList.add( + GenerationSpec.createValueList(intTypeInfo, intValueList)); + explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE); + String columnName = "col" + columnNum++; + columns.add(columnName); + colExpr = new ExprNodeColumnDesc(intTypeInfo, columnName, "table", false); + } else { + final Object scalarObject; + if (random.nextInt(10) != 0) { + scalarObject = intValueList.get(random.nextInt(intValueListCount)); + } else { + scalarObject = null; + } + colExpr = new ExprNodeConstantDesc(typeInfo, scalarObject); + } + } + for (int c = 0; c < columnCount; c++) { + ExprNodeDesc colExpr; + if (!contains(constantColumns, c)) { + + generationSpecList.add( + GenerationSpec.createSameType(typeInfo)); + explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE); + String columnName = "col" + columnNum++; + columns.add(columnName); + colExpr = new ExprNodeColumnDesc(typeInfo, columnName, "table", false); + } else { + final Object scalarObject; + if (!contains(nullConstantColumns, c)) { + scalarObject = + VectorRandomRowSource.randomPrimitiveObject( + random, (PrimitiveTypeInfo) typeInfo); + } else { + scalarObject = null; + } + colExpr = new ExprNodeConstantDesc(typeInfo, scalarObject); + } + children.add(colExpr); + } + + VectorRandomRowSource rowSource = new VectorRandomRowSource(); + + rowSource.initGenerationSpecSchema( + random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ allowNulls, + explicitDataTypePhysicalVariationList); + + String[] columnNames = columns.toArray(new String[0]); + + Object[][] randomRows = rowSource.randomRows(100000); + + VectorRandomBatchSource batchSource = + VectorRandomBatchSource.createInterestingBatches( + random, + rowSource, + randomRows, + null); + + final GenericUDF udf = + (isCoalesce ? new GenericUDFCoalesce() : new GenericUDFElt()); + + final int start = isCoalesce ? 0 : 1; + final int end = start + columnCount; + ObjectInspector[] argumentOIs = + new ObjectInspector[end]; + if (!isCoalesce) { + argumentOIs[0] = intObjectInspector; + } + for (int i = start; i < end; i++) { + argumentOIs[i] = objectInspector; + } + final ObjectInspector outputObjectInspector = udf.initialize(argumentOIs); + + TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(outputObjectInspector); + + ExprNodeGenericFuncDesc exprDesc = + new ExprNodeGenericFuncDesc(typeInfo, udf, children); + + final int rowCount = randomRows.length; + Object[][] resultObjectsArray = new Object[CoalesceEltTestMode.count][]; + for (int i = 0; i < CoalesceEltTestMode.count; i++) { + + Object[] resultObjects = new Object[rowCount]; + resultObjectsArray[i] = resultObjects; + + CoalesceEltTestMode coalesceEltTestMode = CoalesceEltTestMode.values()[i]; + switch (coalesceEltTestMode) { + case ROW_MODE: + if (!doRowCastTest( + typeInfo, + columns, + children, + udf, exprDesc, + randomRows, + rowSource.rowStructObjectInspector(), + exprDesc.getWritableObjectInspector(), + resultObjects)) { + return false; + } + break; + case ADAPTOR: + case VECTOR_EXPRESSION: + if (!doVectorCastTest( + typeInfo, + columns, + columnNames, + rowSource.typeInfos(), + rowSource.dataTypePhysicalVariations(), + children, + udf, exprDesc, + coalesceEltTestMode, + batchSource, + exprDesc.getWritableObjectInspector(), + outputTypeInfo, + resultObjects)) { + return false; + } + break; + default: + throw new RuntimeException("Unexpected IF statement test mode " + coalesceEltTestMode); + } + } + + for (int i = 0; i < rowCount; i++) { + // Row-mode is the expected value. + Object expectedResult = resultObjectsArray[0][i]; + + for (int v = 1; v < CoalesceEltTestMode.count; v++) { + Object vectorResult = resultObjectsArray[v][i]; + CoalesceEltTestMode coalesceEltTestMode = CoalesceEltTestMode.values()[v]; + if (expectedResult == null || vectorResult == null) { + if (expectedResult != null || vectorResult != null) { + Assert.fail( + "Row " + i + + " sourceTypeName " + typeName + + " " + coalesceEltTestMode + + " result is NULL " + (vectorResult == null ? "YES" : "NO result " + vectorResult.toString()) + + " does not match row-mode expected result is NULL " + + (expectedResult == null ? "YES" : "NO result " + expectedResult.toString()) + + " row values " + Arrays.toString(randomRows[i]) + + " exprDesc " + exprDesc.toString()); + } + } else { + + if (!expectedResult.equals(vectorResult)) { + Assert.fail( + "Row " + i + + " sourceTypeName " + typeName + + " " + coalesceEltTestMode + + " result " + vectorResult.toString() + + " (" + vectorResult.getClass().getSimpleName() + ")" + + " does not match row-mode expected result " + expectedResult.toString() + + " (" + expectedResult.getClass().getSimpleName() + ")" + + " row values " + Arrays.toString(randomRows[i]) + + " exprDesc " + exprDesc.toString()); + } + } + } + } + return true; + } + + private boolean doRowCastTest(TypeInfo typeInfo, + List columns, List children, + GenericUDF udf, ExprNodeGenericFuncDesc exprDesc, + Object[][] randomRows, + ObjectInspector rowInspector, + ObjectInspector objectInspector, + Object[] resultObjects) + throws Exception { + + /* + System.out.println( + "*DEBUG* typeInfo " + typeInfo.toString() + + " targetTypeInfo " + targetTypeInfo + + " coalesceEltTestMode ROW_MODE" + + " exprDesc " + exprDesc.toString()); + */ + + HiveConf hiveConf = new HiveConf(); + ExprNodeEvaluator evaluator = + ExprNodeEvaluatorFactory.get(exprDesc, hiveConf); + try { + evaluator.initialize(rowInspector); + } catch (HiveException e) { + return false; + } + + final int rowCount = randomRows.length; + for (int i = 0; i < rowCount; i++) { + Object[] row = randomRows[i]; + Object result = evaluator.evaluate(row); + Object copyResult = + ObjectInspectorUtils.copyToStandardObject( + result, objectInspector, + ObjectInspectorCopyOption.WRITABLE); + resultObjects[i] = copyResult; + } + + return true; + } + + private void extractResultObjects(VectorizedRowBatch batch, int rowIndex, + VectorExtractRow resultVectorExtractRow, Object[] scrqtchRow, + ObjectInspector objectInspector, Object[] resultObjects) { + + boolean selectedInUse = batch.selectedInUse; + int[] selected = batch.selected; + for (int logicalIndex = 0; logicalIndex < batch.size; logicalIndex++) { + final int batchIndex = (selectedInUse ? selected[logicalIndex] : logicalIndex); + resultVectorExtractRow.extractRow(batch, batchIndex, scrqtchRow); + + Object copyResult = + ObjectInspectorUtils.copyToStandardObject( + scrqtchRow[0], objectInspector, ObjectInspectorCopyOption.WRITABLE); + resultObjects[rowIndex++] = copyResult; + } + } + + private boolean doVectorCastTest(TypeInfo typeInfo, + List columns, String[] columnNames, + TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, + List children, + GenericUDF udf, ExprNodeGenericFuncDesc exprDesc, + CoalesceEltTestMode coalesceEltTestMode, + VectorRandomBatchSource batchSource, + ObjectInspector objectInspector, + TypeInfo outputTypeInfo, Object[] resultObjects) + throws Exception { + + HiveConf hiveConf = new HiveConf(); + if (coalesceEltTestMode == CoalesceEltTestMode.ADAPTOR) { + hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_TEST_VECTOR_ADAPTOR_OVERRIDE, true); + } + + VectorizationContext vectorizationContext = + new VectorizationContext( + "name", + columns, + Arrays.asList(typeInfos), + Arrays.asList(dataTypePhysicalVariations), + hiveConf); + VectorExpression vectorExpression = + vectorizationContext.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION); + vectorExpression.transientInit(); + + if (coalesceEltTestMode == CoalesceEltTestMode.VECTOR_EXPRESSION && + vectorExpression instanceof VectorUDFAdaptor) { + System.out.println( + "*NO NATIVE VECTOR EXPRESSION* typeInfo " + typeInfo.toString() + + " coalesceEltTestMode " + coalesceEltTestMode + + " vectorExpression " + vectorExpression.toString()); + } + + System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName()); + + /* + System.out.println( + "*DEBUG* typeInfo " + typeInfo.toString() + + " coalesceEltTestMode " + coalesceEltTestMode + + " vectorExpression " + vectorExpression.toString()); + */ + + VectorRandomRowSource rowSource = batchSource.getRowSource(); + VectorizedRowBatchCtx batchContext = + new VectorizedRowBatchCtx( + columnNames, + rowSource.typeInfos(), + rowSource.dataTypePhysicalVariations(), + /* dataColumnNums */ null, + /* partitionColumnCount */ 0, + /* virtualColumnCount */ 0, + /* neededVirtualColumns */ null, + vectorizationContext.getScratchColumnTypeNames(), + vectorizationContext.getScratchDataTypePhysicalVariations()); + + VectorizedRowBatch batch = batchContext.createVectorizedRowBatch(); + + VectorExtractRow resultVectorExtractRow = new VectorExtractRow(); + resultVectorExtractRow.init( + new TypeInfo[] { outputTypeInfo }, new int[] { vectorExpression.getOutputColumnNum() }); + Object[] scrqtchRow = new Object[1]; + + // System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName()); + + /* + System.out.println( + "*DEBUG* typeInfo1 " + typeInfo1.toString() + + " typeInfo2 " + typeInfo2.toString() + + " arithmeticTestMode " + arithmeticTestMode + + " columnScalarMode " + columnScalarMode + + " vectorExpression " + vectorExpression.toString()); + */ + + batchSource.resetBatchIteration(); + int rowIndex = 0; + while (true) { + if (!batchSource.fillNextBatch(batch)) { + break; + } + vectorExpression.evaluate(batch); + extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow, + objectInspector, resultObjects); + rowIndex += batch.size; + } + + return true; + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIndex.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIndex.java new file mode 100644 index 0000000..867d01d --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIndex.java @@ -0,0 +1,590 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Random; +import java.util.Set; +import java.util.TreeSet; +import java.util.stream.IntStream; + +import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.SupportedTypes; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIndex; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.WritableComparator; +import org.apache.hadoop.io.WritableComparable; + +import junit.framework.Assert; + +import org.junit.Ignore; +import org.junit.Test; + +public class TestVectorIndex { + + @Test + public void testListIndex() throws Exception { + Random random = new Random(241); + + doIndex(random, /* isList */ true, null, /* isFullElementTypeGamut */ true); + } + + private static TypeInfo[] decimalTypeInfos = new TypeInfo[] { + new DecimalTypeInfo(38, 18), + new DecimalTypeInfo(25, 2), + new DecimalTypeInfo(19, 4), + new DecimalTypeInfo(18, 10), + new DecimalTypeInfo(17, 3), + new DecimalTypeInfo(12, 2), + new DecimalTypeInfo(7, 1) + }; + + @Test + public void testMapIndex() throws Exception { + Random random = new Random(233); + + doIndex(random, /* isList */ false, "int", /* isFullElementTypeGamut */ true); + doIndex(random, /* isList */ false, "bigint", /* isFullElementTypeGamut */ false); + doIndex(random, /* isList */ false, "double", /* isFullElementTypeGamut */ false); + doIndex(random, /* isList */ false, "string", /* isFullElementTypeGamut */ false); + for (TypeInfo typeInfo : decimalTypeInfos) { + doIndex( + random, /* isList */ false, typeInfo.getTypeName(), /* isFullElementTypeGamut */ false); + } + } + + public enum IndexTestMode { + ROW_MODE, + ADAPTOR, + VECTOR_EXPRESSION; + + static final int count = values().length; + } + + private void doIndex(Random random, boolean isList, String keyTypeName, + boolean isFullElementTypeGamut) + throws Exception { + + String oneElementRootTypeName = "bigint"; + doIndexOnRandomDataType(random, isList, keyTypeName, oneElementRootTypeName, + /* allowNulls */ true, /* isScalarIndex */ false); + + doIndexOnRandomDataType(random, isList, keyTypeName, oneElementRootTypeName, + /* allowNulls */ true, /* isScalarIndex */ true); + + doIndexOnRandomDataType(random, isList, keyTypeName, oneElementRootTypeName, + /* allowNulls */ false, /* isScalarIndex */ false); + doIndexOnRandomDataType(random, isList, keyTypeName, oneElementRootTypeName, + /* allowNulls */ false, /* isScalarIndex */ true); + + if (!isFullElementTypeGamut) { + return; + } + + List elementRootTypeNameList = new ArrayList(); + elementRootTypeNameList.add("int"); + elementRootTypeNameList.add("bigint"); + elementRootTypeNameList.add("double"); + elementRootTypeNameList.add("string"); + elementRootTypeNameList.add("char"); + elementRootTypeNameList.add("varchar"); + elementRootTypeNameList.add("date"); + elementRootTypeNameList.add("timestamp"); + elementRootTypeNameList.add("binary"); + elementRootTypeNameList.add("decimal"); + elementRootTypeNameList.add("interval_day_time"); + + for (String elementRootTypeName : elementRootTypeNameList) { + doIndexOnRandomDataType(random, isList, keyTypeName, elementRootTypeName, + /* allowNulls */ true, /* isScalarIndex */ false); + } + } + + private boolean doIndexOnRandomDataType(Random random, + boolean isList, String keyTypeName, String elementRootTypeName, + boolean allowNulls, boolean isScalarIndex) + throws Exception { + + String elementTypeName = + VectorRandomRowSource.getDecoratedTypeName( + random, elementRootTypeName, SupportedTypes.ALL, /* allowedTypeNameSet */ null, + /* depth */ 0, /* maxDepth */ 3); + + TypeInfo elementTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(elementTypeName); + + ObjectInspector elementObjectInspector = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( + elementTypeInfo); + + //---------------------------------------------------------------------------------------------- + + final TypeInfo keyTypeInfo; + if (isList) { + keyTypeInfo = TypeInfoFactory.intTypeInfo; + } else { + keyTypeInfo = + TypeInfoUtils.getTypeInfoFromTypeString(keyTypeName); + } + final ObjectInspector keyObjectInspector = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( + keyTypeInfo); + + Object exampleObject = + (isList ? + ((WritableIntObjectInspector) keyObjectInspector).create(0) : + VectorRandomRowSource.randomWritable( + random, keyTypeInfo, keyObjectInspector, DataTypePhysicalVariation.NONE, + /* allowNull */ false)); + WritableComparator writableComparator = + WritableComparator.get((Class) exampleObject.getClass()); + + final int allKeyCount = 10 + random.nextInt(10); + final int keyCount = 5 + random.nextInt(allKeyCount / 2); + List allKeyList = new ArrayList(allKeyCount); + + Set allKeyTreeSet = new TreeSet(writableComparator); + + int fillAllKeyCount = 0; + while (fillAllKeyCount < allKeyCount) { + Object object; + if (isList) { + WritableIntObjectInspector writableOI = (WritableIntObjectInspector) keyObjectInspector; + int index = random.nextInt(keyCount); + object = writableOI.create(index); + while (allKeyTreeSet.contains(object)) { + index = + (random.nextBoolean() ? + random.nextInt() : + (random.nextBoolean() ? -1 : keyCount)); + object = writableOI.create(index); + } + } else { + do { + object = + VectorRandomRowSource.randomWritable( + random, keyTypeInfo, keyObjectInspector, DataTypePhysicalVariation.NONE, + /* allowNull */ false); + } while (allKeyTreeSet.contains(object)); + } + allKeyList.add(object); + allKeyTreeSet.add(object); + fillAllKeyCount++; + } + + List keyList = new ArrayList(); + + Set keyTreeSet = new TreeSet(writableComparator); + + int fillKeyCount = 0; + while (fillKeyCount < keyCount) { + Object newKey = allKeyList.get(random.nextInt(allKeyCount)); + if (keyTreeSet.contains(newKey)) { + continue; + } + keyList.add(newKey); + keyTreeSet.add(newKey); + fillKeyCount++; + } + + //---------------------------------------------------------------------------------------------- + + final TypeInfo typeInfo; + if (isList) { + ListTypeInfo listTypeInfo = new ListTypeInfo(); + listTypeInfo.setListElementTypeInfo(elementTypeInfo); + typeInfo = listTypeInfo; + } else { + MapTypeInfo mapTypeInfo = new MapTypeInfo(); + mapTypeInfo.setMapKeyTypeInfo(keyTypeInfo); + mapTypeInfo.setMapValueTypeInfo(elementTypeInfo); + typeInfo = mapTypeInfo; + } + + final String typeName = typeInfo.getTypeName(); + + final ObjectInspector objectInspector = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( + typeInfo); + + //---------------------------------------------------------------------------------------------- + + GenerationSpec generationSpec = GenerationSpec.createSameType(typeInfo); + + List generationSpecList = new ArrayList(); + List explicitDataTypePhysicalVariationList = + new ArrayList(); + List columns = new ArrayList(); + List children = new ArrayList(); + + int columnNum = 1; + + ExprNodeDesc keyColExpr; + + if (!isScalarIndex) { + generationSpecList.add( + GenerationSpec.createValueList(keyTypeInfo, keyList)); + explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE); + String columnName = "col" + columnNum++; + columns.add(columnName); + keyColExpr = new ExprNodeColumnDesc(keyTypeInfo, columnName, "table", false); + } else { + Object scalarWritable = keyList.get(random.nextInt(keyCount)); + final Object scalarObject = + VectorRandomRowSource.getNonWritableObject( + scalarWritable, keyTypeInfo, keyObjectInspector); + keyColExpr = new ExprNodeConstantDesc(keyTypeInfo, scalarObject); + } + + /* + System.out.println("*DEBUG* typeName " + typeName); + System.out.println("*DEBUG* keyColExpr " + keyColExpr.toString()); + System.out.println("*DEBUG* keyList " + keyList.toString()); + System.out.println("*DEBUG* allKeyList " + allKeyList.toString()); + */ + + generationSpecList.add( + GenerationSpec.createValueList(typeInfo, keyList)); + explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE); + String columnName = "col" + columnNum++; + columns.add(columnName); + + ExprNodeDesc listOrMapColExpr; + listOrMapColExpr = new ExprNodeColumnDesc(typeInfo, columnName, "table", false); + + children.add(listOrMapColExpr); + children.add(keyColExpr); + + VectorRandomRowSource rowSource = new VectorRandomRowSource(); + + rowSource.initGenerationSpecSchema( + random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ allowNulls, + explicitDataTypePhysicalVariationList); + + String[] columnNames = columns.toArray(new String[0]); + + Object[][] randomRows = rowSource.randomRows(100000); + + VectorRandomBatchSource batchSource = + VectorRandomBatchSource.createInterestingBatches( + random, + rowSource, + randomRows, + null); + + final GenericUDF udf = new GenericUDFIndex(); + + ObjectInspector[] argumentOIs = new ObjectInspector[2]; + argumentOIs[0] = objectInspector; + argumentOIs[1] = keyObjectInspector; + + final ObjectInspector outputObjectInspector = udf.initialize(argumentOIs); + + TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(outputObjectInspector); + + ExprNodeGenericFuncDesc exprDesc = + new ExprNodeGenericFuncDesc(elementTypeInfo, udf, children); + + System.out.println("here"); + + final int rowCount = randomRows.length; + Object[][] resultObjectsArray = new Object[IndexTestMode.count][]; + for (int i = 0; i < IndexTestMode.count; i++) { + + Object[] resultObjects = new Object[rowCount]; + resultObjectsArray[i] = resultObjects; + + IndexTestMode indexTestMode = IndexTestMode.values()[i]; + switch (indexTestMode) { + case ROW_MODE: + if (!doRowCastTest( + typeInfo, + columns, + children, + udf, exprDesc, + randomRows, + rowSource.rowStructObjectInspector(), + elementObjectInspector, + outputTypeInfo, + resultObjects)) { + return false; + } + break; + case ADAPTOR: + case VECTOR_EXPRESSION: + if (!doVectorCastTest( + typeInfo, + columns, + columnNames, + rowSource.typeInfos(), + rowSource.dataTypePhysicalVariations(), + children, + udf, exprDesc, + indexTestMode, + batchSource, + exprDesc.getWritableObjectInspector(), + outputTypeInfo, + resultObjects)) { + return false; + } + break; + default: + throw new RuntimeException("Unexpected IF statement test mode " + indexTestMode); + } + } + + for (int i = 0; i < rowCount; i++) { + // Row-mode is the expected value. + Object expectedResult = resultObjectsArray[0][i]; + + for (int v = 1; v < IndexTestMode.count; v++) { + Object vectorResult = resultObjectsArray[v][i]; + IndexTestMode indexTestMode = IndexTestMode.values()[v]; + if (expectedResult == null || vectorResult == null) { + if (expectedResult != null || vectorResult != null) { + Assert.fail( + "Row " + i + + " sourceTypeName " + typeName + + " " + indexTestMode + + " result is NULL " + (vectorResult == null ? "YES" : "NO result " + vectorResult.toString()) + + " does not match row-mode expected result is NULL " + + (expectedResult == null ? "YES" : "NO result " + expectedResult.toString()) + + " row values " + Arrays.toString(randomRows[i]) + + " exprDesc " + exprDesc.toString()); + } + } else { + + if (!expectedResult.equals(vectorResult)) { + Assert.fail( + "Row " + i + + " sourceTypeName " + typeName + + " " + indexTestMode + + " result " + vectorResult.toString() + + " (" + vectorResult.getClass().getSimpleName() + ")" + + " does not match row-mode expected result " + expectedResult.toString() + + " (" + expectedResult.getClass().getSimpleName() + ")" + + " row values " + Arrays.toString(randomRows[i]) + + " exprDesc " + exprDesc.toString()); + } + } + } + } + + return true; + } + + private boolean doRowCastTest(TypeInfo typeInfo, + List columns, List children, + GenericUDF udf, ExprNodeGenericFuncDesc exprDesc, + Object[][] randomRows, + ObjectInspector rowInspector, + ObjectInspector elementObjectInspector, + TypeInfo outputTypeInfo, + Object[] resultObjects) + throws Exception { + + /* + System.out.println( + "*DEBUG* typeInfo " + typeInfo.toString() + + " outputTypeInfo " + outputTypeInfo.toString() + + " indexTestMode ROW_MODE" + + " exprDesc " + exprDesc.toString()); + */ + + HiveConf hiveConf = new HiveConf(); + ExprNodeEvaluator evaluator = + ExprNodeEvaluatorFactory.get(exprDesc, hiveConf); + try { + evaluator.initialize(rowInspector); + } catch (HiveException e) { + return false; + } + + final int rowCount = randomRows.length; + for (int i = 0; i < rowCount; i++) { + Object[] row = randomRows[i]; + Object result = evaluator.evaluate(row); + Object copyResult; + try { + copyResult = + ObjectInspectorUtils.copyToStandardObject( + result, elementObjectInspector, + ObjectInspectorCopyOption.WRITABLE); + } catch (Exception e) { + System.out.println("here"); + throw e; + } + resultObjects[i] = copyResult; + } + + return true; + } + + private void extractResultObjects(VectorizedRowBatch batch, int rowIndex, + VectorExtractRow resultVectorExtractRow, Object[] scrqtchRow, + ObjectInspector objectInspector, Object[] resultObjects) { + + boolean selectedInUse = batch.selectedInUse; + int[] selected = batch.selected; + for (int logicalIndex = 0; logicalIndex < batch.size; logicalIndex++) { + final int batchIndex = (selectedInUse ? selected[logicalIndex] : logicalIndex); + resultVectorExtractRow.extractRow(batch, batchIndex, scrqtchRow); + + Object copyResult = + ObjectInspectorUtils.copyToStandardObject( + scrqtchRow[0], objectInspector, ObjectInspectorCopyOption.WRITABLE); + resultObjects[rowIndex++] = copyResult; + } + } + + private boolean doVectorCastTest(TypeInfo typeInfo, + List columns, String[] columnNames, + TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, + List children, + GenericUDF udf, ExprNodeGenericFuncDesc exprDesc, + IndexTestMode indexTestMode, + VectorRandomBatchSource batchSource, + ObjectInspector objectInspector, + TypeInfo outputTypeInfo, Object[] resultObjects) + throws Exception { + + HiveConf hiveConf = new HiveConf(); + if (indexTestMode == IndexTestMode.ADAPTOR) { + hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_TEST_VECTOR_ADAPTOR_OVERRIDE, true); + } + + VectorizationContext vectorizationContext = + new VectorizationContext( + "name", + columns, + Arrays.asList(typeInfos), + Arrays.asList(dataTypePhysicalVariations), + hiveConf); + VectorExpression vectorExpression = + vectorizationContext.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION); + vectorExpression.transientInit(); + + if (indexTestMode == IndexTestMode.VECTOR_EXPRESSION && + vectorExpression instanceof VectorUDFAdaptor) { + System.out.println( + "*NO NATIVE VECTOR EXPRESSION* typeInfo " + typeInfo.toString() + + " indexTestMode " + indexTestMode + + " vectorExpression " + vectorExpression.toString()); + } + + System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName()); + + /* + System.out.println( + "*DEBUG* typeInfo " + typeInfo.toString() + + " indexTestMode " + indexTestMode + + " vectorExpression " + vectorExpression.toString()); + */ + + VectorRandomRowSource rowSource = batchSource.getRowSource(); + VectorizedRowBatchCtx batchContext = + new VectorizedRowBatchCtx( + columnNames, + rowSource.typeInfos(), + rowSource.dataTypePhysicalVariations(), + /* dataColumnNums */ null, + /* partitionColumnCount */ 0, + /* virtualColumnCount */ 0, + /* neededVirtualColumns */ null, + vectorizationContext.getScratchColumnTypeNames(), + vectorizationContext.getScratchDataTypePhysicalVariations()); + + VectorizedRowBatch batch = batchContext.createVectorizedRowBatch(); + + VectorExtractRow resultVectorExtractRow = new VectorExtractRow(); + resultVectorExtractRow.init( + new TypeInfo[] { outputTypeInfo }, new int[] { vectorExpression.getOutputColumnNum() }); + Object[] scrqtchRow = new Object[1]; + + /* + System.out.println( + "*DEBUG* typeInfo1 " + typeInfo1.toString() + + " typeInfo2 " + typeInfo2.toString() + + " arithmeticTestMode " + arithmeticTestMode + + " columnScalarMode " + columnScalarMode + + " vectorExpression " + vectorExpression.toString()); + */ + + batchSource.resetBatchIteration(); + int rowIndex = 0; + while (true) { + if (!batchSource.fillNextBatch(batch)) { + break; + } + vectorExpression.evaluate(batch); + extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow, + objectInspector, resultObjects); + rowIndex += batch.size; + } + + return true; + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorNull.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorNull.java index a4fc0d57..9b0a2ae 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorNull.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorNull.java @@ -49,8 +49,6 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull; @@ -74,8 +72,6 @@ import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.WritableComparator; -import org.apache.hadoop.io.WritableComparable; import junit.framework.Assert; @@ -407,7 +403,7 @@ private boolean doVectorCastTest(TypeInfo typeInfo, boolean isFilter, " vectorExpression " + vectorExpression.toString()); } - System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName()); + // System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName()); /* System.out.println( diff --git ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out index b278ecc..f5d919d 100644 --- ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out @@ -88,11 +88,11 @@ POSTHOOK: Lineage: parquet_map_type.stringmap SIMPLE [(parquet_map_type_staging) PREHOOK: query: select count(*) from parquet_map_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_map_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1023 PREHOOK: query: explain vectorization expression select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 @@ -127,7 +127,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 2, 3, 8, 9, 10, 11, 12, 13] - selectExpressions: VectorUDFMapIndexStringScalar(col 1:map, key: k2) -> 8:string, VectorUDFMapIndexLongScalar(col 2:map, key: 456) -> 9:int, VectorUDFMapIndexDoubleScalar(col 3:map, key: 123.123) -> 10:double, VectorUDFMapIndexStringCol(col 1:map, key: col 4:string) -> 11:string, VectorUDFMapIndexLongCol(col 2:map, key: col 5:int) -> 12:int, VectorUDFMapIndexDoubleCol(col 3:map, key: col 6:double) -> 13:double + selectExpressions: VectorUDFMapIndexStringScalar(col 1:map, key: k2) -> 8:string, VectorUDFMapIndexLongScalar(col 2:map, key: 456) -> 9:int, VectorUDFMapIndexDecimalScalar(col 3:map, key: 123.123) -> 10:double, VectorUDFMapIndexStringCol(col 1:map, key: col 4:string) -> 11:string, VectorUDFMapIndexLongCol(col 2:map, key: col 5:int) -> 12:int, VectorUDFMapIndexDoubleCol(col 3:map, key: col 6:double) -> 13:double Statistics: Num rows: 1023 Data size: 2183412 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 @@ -167,12 +167,12 @@ PREHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456 stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"k1":"v1","k2":"v1-2"} {123:1,456:2} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 {"k1":"v2","k2":"v2-2"} {123:3,456:4} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 {"k1":"v3","k2":"v3-2"} {123:5,456:6} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 @@ -228,7 +228,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [8, 9, 10] - selectExpressions: VectorUDFMapIndexStringScalar(col 1:map, key: k1) -> 8:string, VectorUDFMapIndexLongScalar(col 2:map, key: 123) -> 9:int, VectorUDFMapIndexDoubleScalar(col 3:map, key: 123.123) -> 10:double + selectExpressions: VectorUDFMapIndexStringScalar(col 1:map, key: k1) -> 8:string, VectorUDFMapIndexLongScalar(col 2:map, key: 123) -> 9:int, VectorUDFMapIndexDecimalScalar(col 3:map, key: 123.123) -> 10:double Statistics: Num rows: 511 Data size: 995378 Basic stats: COMPLETE Column stats: NONE Top N Key Operator sort order: + @@ -360,12 +360,12 @@ PREHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1' from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 199 100.1 v100 1999 1000.1 v1000 2001 1001.1 v1001 @@ -396,22 +396,22 @@ POSTHOOK: Lineage: parquet_map_type.stringmap SIMPLE [(parquet_map_type_staging) PREHOOK: query: select count(*) from parquet_map_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_map_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1024 PREHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"k1":"v1","k2":"v1-2"} {123:1,456:2} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 {"k1":"v2","k2":"v2-2"} {123:3,456:4} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 {"k1":"v3","k2":"v3-2"} {123:5,456:6} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 @@ -426,12 +426,12 @@ PREHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1' from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 199 100.1 v100 1999 1000.1 v1000 2001 1001.1 v1001 @@ -462,22 +462,22 @@ POSTHOOK: Lineage: parquet_map_type.stringmap SIMPLE [(parquet_map_type_staging) PREHOOK: query: select count(*) from parquet_map_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_map_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1025 PREHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"k1":"v1","k2":"v1-2"} {123:1,456:2} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 {"k1":"v2","k2":"v2-2"} {123:3,456:4} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 {"k1":"v3","k2":"v3-2"} {123:5,456:6} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 @@ -492,12 +492,12 @@ PREHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1' from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 199 100.1 v100 1999 1000.1 v1000 2001 1001.1 v1001 diff --git ql/src/test/results/clientpositive/parquet_map_type_vectorization.q.out ql/src/test/results/clientpositive/parquet_map_type_vectorization.q.out index 49cf3a2..b4c75d4 100644 --- ql/src/test/results/clientpositive/parquet_map_type_vectorization.q.out +++ ql/src/test/results/clientpositive/parquet_map_type_vectorization.q.out @@ -124,7 +124,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 2, 3, 8, 9, 10, 11, 12, 13] - selectExpressions: VectorUDFMapIndexStringScalar(col 1:map, key: k2) -> 8:string, VectorUDFMapIndexLongScalar(col 2:map, key: 456) -> 9:int, VectorUDFMapIndexDoubleScalar(col 3:map, key: 123.123) -> 10:double, VectorUDFMapIndexStringCol(col 1:map, key: col 4:string) -> 11:string, VectorUDFMapIndexLongCol(col 2:map, key: col 5:int) -> 12:int, VectorUDFMapIndexDoubleCol(col 3:map, key: col 6:double) -> 13:double + selectExpressions: VectorUDFMapIndexStringScalar(col 1:map, key: k2) -> 8:string, VectorUDFMapIndexLongScalar(col 2:map, key: 456) -> 9:int, VectorUDFMapIndexDecimalScalar(col 3:map, key: 123.123) -> 10:double, VectorUDFMapIndexStringCol(col 1:map, key: col 4:string) -> 11:string, VectorUDFMapIndexLongCol(col 2:map, key: col 5:int) -> 12:int, VectorUDFMapIndexDoubleCol(col 3:map, key: col 6:double) -> 13:double Statistics: Num rows: 1023 Data size: 7161 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 @@ -218,7 +218,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [8, 9, 10] - selectExpressions: VectorUDFMapIndexStringScalar(col 1:map, key: k1) -> 8:string, VectorUDFMapIndexLongScalar(col 2:map, key: 123) -> 9:int, VectorUDFMapIndexDoubleScalar(col 3:map, key: 123.123) -> 10:double + selectExpressions: VectorUDFMapIndexStringScalar(col 1:map, key: k1) -> 8:string, VectorUDFMapIndexLongScalar(col 2:map, key: 123) -> 9:int, VectorUDFMapIndexDecimalScalar(col 3:map, key: 123.123) -> 10:double Statistics: Num rows: 511 Data size: 3577 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1), sum(_col2)