diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index dd59bf2..e0f249d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -478,7 +478,7 @@ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc, Mode mode) th mode); } if (ve == null) { - throw new HiveException("Could not vectorize expression: "+exprDesc.getName()); + throw new HiveException("Could not vectorize expression: "+exprDesc.toString()); } if (LOG.isDebugEnabled()) { LOG.debug("Input Expression = " + exprDesc.getTypeInfo() @@ -993,7 +993,7 @@ private VectorExpression getVectorExpressionForUdf(GenericUDF genericeUdf, int numChildren = (childExpr == null) ? 0 : childExpr.size(); - if (numChildren > 2 && genericeUdf != null && mode == Mode.FILTER && + if (numChildren > 2 && genericeUdf != null && ((genericeUdf instanceof GenericUDFOPOr) || (genericeUdf instanceof GenericUDFOPAnd))) { // Special case handling for Multi-OR and Multi-AND. @@ -1015,14 +1015,26 @@ private VectorExpression getVectorExpressionForUdf(GenericUDF genericeUdf, } Class vclass; if (genericeUdf instanceof GenericUDFOPOr) { - vclass = FilterExprOrExpr.class; + if (mode == Mode.PROJECTION) { + vclass = ColMultiOrCol.class; + } else { + vclass = FilterExprOrExpr.class; + } } else if (genericeUdf instanceof GenericUDFOPAnd) { - vclass = FilterExprAndExpr.class; + if (mode == Mode.PROJECTION) { + vclass = ColMultiAndCol.class; + } else { + vclass = FilterExprAndExpr.class; + } } else { throw new RuntimeException("Unexpected multi-child UDF"); } Mode childrenMode = getChildrenMode(mode, udfClass); - return createVectorExpression(vclass, childExpr, childrenMode, returnType); + if (mode == Mode.PROJECTION) { + return createVectorMultiAndOrProjectionExpr(vclass, childExpr, childrenMode, returnType); + } else { + return createVectorExpression(vclass, childExpr, childrenMode, returnType); + } } if (numChildren > VectorExpressionDescriptor.MAX_NUM_ARGUMENTS) { return null; @@ -1061,6 +1073,37 @@ private VectorExpression getVectorExpressionForUdf(GenericUDF genericeUdf, return createVectorExpression(vclass, childExpr, childrenMode, returnType); } + private void determineChildrenVectorExprAndArguments(Class vectorClass, + List childExpr, int numChildren, Mode childrenMode, + VectorExpression.Type [] inputTypes, List children, Object[] arguments) + throws HiveException { + for (int i = 0; i < numChildren; i++) { + ExprNodeDesc child = childExpr.get(i); + String undecoratedName = getUndecoratedName(child.getTypeInfo().getTypeName()); + inputTypes[i] = VectorExpression.Type.getValue(undecoratedName); + if (inputTypes[i] == VectorExpression.Type.OTHER){ + throw new HiveException("No vector type for " + vectorClass.getSimpleName() + " argument #" + i + " type name " + undecoratedName); + } + if (child instanceof ExprNodeGenericFuncDesc) { + VectorExpression vChild = getVectorExpression(child, childrenMode); + children.add(vChild); + arguments[i] = vChild.getOutputColumn(); + } else if (child instanceof ExprNodeColumnDesc) { + int colIndex = getInputColumnIndex((ExprNodeColumnDesc) child); + if (childrenMode == Mode.FILTER) { + // In filter mode, the column must be a boolean + children.add(new SelectColumnIsTrue(colIndex)); + } + arguments[i] = colIndex; + } else if (child instanceof ExprNodeConstantDesc) { + Object scalarValue = getVectorTypeScalarValue((ExprNodeConstantDesc) child); + arguments[i] = (null == scalarValue) ? getConstantVectorExpression(null, child.getTypeInfo(), childrenMode) : scalarValue; + } else { + throw new HiveException("Cannot handle expression type: " + child.getClass().getSimpleName()); + } + } + } + private VectorExpression createVectorExpression(Class vectorClass, List childExpr, Mode childrenMode, TypeInfo returnType) throws HiveException { int numChildren = childExpr == null ? 0: childExpr.size(); @@ -1068,31 +1111,41 @@ private VectorExpression createVectorExpression(Class vectorClass, List children = new ArrayList(); Object[] arguments = new Object[numChildren]; try { + determineChildrenVectorExprAndArguments(vectorClass, childExpr, numChildren, childrenMode, + inputTypes, children, arguments); + VectorExpression vectorExpression = instantiateExpression(vectorClass, returnType, arguments); + vectorExpression.setInputTypes(inputTypes); + if ((vectorExpression != null) && !children.isEmpty()) { + vectorExpression.setChildExpressions(children.toArray(new VectorExpression[0])); + } + return vectorExpression; + } catch (Exception ex) { + throw new HiveException(ex); + } finally { + for (VectorExpression ve : children) { + ocm.freeOutputColumn(ve.getOutputColumn()); + } + } + } + + private VectorExpression createVectorMultiAndOrProjectionExpr(Class vectorClass, + List childExpr, Mode childrenMode, TypeInfo returnType) throws HiveException { + int numChildren = childExpr == null ? 0: childExpr.size(); + VectorExpression.Type [] inputTypes = new VectorExpression.Type[numChildren]; + List children = new ArrayList(); + Object[] arguments = new Object[numChildren]; + try { + determineChildrenVectorExprAndArguments(vectorClass, childExpr, numChildren, childrenMode, + inputTypes, children, arguments); + + // For Multi-AND/OR, transform the arguments -- column indices into an array of int. + int[] colNums = new int[numChildren]; for (int i = 0; i < numChildren; i++) { - ExprNodeDesc child = childExpr.get(i); - String undecoratedName = getUndecoratedName(child.getTypeInfo().getTypeName()); - inputTypes[i] = VectorExpression.Type.getValue(undecoratedName); - if (inputTypes[i] == VectorExpression.Type.OTHER){ - throw new HiveException("No vector type for " + vectorClass.getSimpleName() + " argument #" + i + " type name " + undecoratedName); - } - if (child instanceof ExprNodeGenericFuncDesc) { - VectorExpression vChild = getVectorExpression(child, childrenMode); - children.add(vChild); - arguments[i] = vChild.getOutputColumn(); - } else if (child instanceof ExprNodeColumnDesc) { - int colIndex = getInputColumnIndex((ExprNodeColumnDesc) child); - if (childrenMode == Mode.FILTER) { - // In filter mode, the column must be a boolean - children.add(new SelectColumnIsTrue(colIndex)); - } - arguments[i] = colIndex; - } else if (child instanceof ExprNodeConstantDesc) { - Object scalarValue = getVectorTypeScalarValue((ExprNodeConstantDesc) child); - arguments[i] = (null == scalarValue) ? getConstantVectorExpression(null, child.getTypeInfo(), childrenMode) : scalarValue; - } else { - throw new HiveException("Cannot handle expression type: " + child.getClass().getSimpleName()); - } + colNums[i] = (Integer) arguments[i]; } + arguments = new Object[1]; + arguments[0] = colNums; + VectorExpression vectorExpression = instantiateExpression(vectorClass, returnType, arguments); vectorExpression.setInputTypes(inputTypes); if ((vectorExpression != null) && !children.isEmpty()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColMultiAndCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColMultiAndCol.java new file mode 100644 index 0000000..cc2a58b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColMultiAndCol.java @@ -0,0 +1,791 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.Arrays; + +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +import com.google.common.base.Preconditions; + +/** + * Evaluate AND of 3 or more boolean columns and store result in the output boolean column. + */ +public class ColMultiAndCol extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int[] colNums; + private int outputColumn; + private int[] andSelected; + private boolean[] intermediateNulls; + + public ColMultiAndCol(int[] colNums, int outputColumn) { + this(); + this.colNums = colNums; + this.outputColumn = outputColumn; + andSelected = new int[VectorizedRowBatch.DEFAULT_SIZE]; + intermediateNulls = new boolean[VectorizedRowBatch.DEFAULT_SIZE]; + } + + public ColMultiAndCol() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + Preconditions.checkState(colNums.length > 2); + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector inputColVector1 = (LongColumnVector) batch.cols[colNums[0]]; + LongColumnVector inputColVector2 = (LongColumnVector) batch.cols[colNums[1]]; + int[] sel = batch.selected; + int n = batch.size; + long[] vector1 = inputColVector1.vector; + long[] vector2 = inputColVector2.vector; + + LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; + long[] outputVector = outV.vector; + if (n <= 0) { + // Nothing to do + return; + } + + /** + * Null processing complicates the algorithm here for Multi-AND. + * + * All true --> true + * 0 or more true with 1 or more null --> result = null + * Any false --> false + * + * We remember nulls in the intermediateNulls array as we go along. + * Entries in andSelected are either true and/or null. + * + * The flags andRepeating and andRepeatingIsNull track if the whole batch so far + * is true or null. + */ + boolean andRepeating = false; + boolean andRepeatingIsNull = false; + int andSel = 0; + Arrays.fill(intermediateNulls, 0, VectorizedRowBatch.DEFAULT_SIZE, false); + + outV.reset(); + + long vector1Value = vector1[0]; + long vector2Value = vector2[0]; + if (inputColVector1.noNulls && inputColVector2.noNulls) { + + // No null checking needed for either side. + + if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { + // All must be selected otherwise size would be zero + + // No nulls to consider. We finish if repeating false, or proceed to next stage + // with all true. + if ((vector1Value & vector2Value) == 0) { + outV.isRepeating = true; + outputVector[0] = 0; + return; + } + andRepeating = true; + } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { + + // Finish if repeating false for side 1. + if (vector1Value == 0) { + outV.isRepeating = true; + outputVector[0] = 0; + return; + } + // Otherwise, remember true when side 2 true for next stage. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (vector2[i] == 1) { + andSelected[andSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (vector2[i] == 1) { + andSelected[andSel++] = i; + } + } + } + } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { + + // Finished if repeating false for side 2 + if (vector2Value == 0) { + outV.isRepeating = true; + outputVector[0] = 0; + return; + } + // Otherwise, remember true when side 1 true for next stage. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + andSelected[andSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (vector1[i] == 1) { + andSelected[andSel++] = i; + } + } + } + } else /* neither side is repeating */{ + + // Remember when side 1 and 2 are true for next stage. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if ((vector1[i] & vector2[i]) != 0) { + andSelected[andSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if ((vector1[i] & vector2[i]) != 0) { + andSelected[andSel++] = i; + } + } + } + } + } else if (inputColVector1.noNulls && !inputColVector2.noNulls) { + + // Only input 2 side can have nulls. + + if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { + // All must be selected otherwise size would be zero + + // Finish if repeating false for side 1. + if (vector1Value == 0) { + outV.isRepeating = true; + outputVector[0] = 0; + return; + } + // Side 1 is repeating true. Remember a side 2 repeating null since it influences. + if (inputColVector2.isNull[0]) { + andRepeatingIsNull = true; + } else if (vector2Value == 0) { + // Finished when both sides are not true. + outV.isRepeating = true; + outputVector[0] = 0; + return; + } + andRepeating = true; + } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { + + // Finished if repeating false for side 1 + if (vector1Value == 0) { + outV.isRepeating = true; + outputVector[0] = 0; + return; + } + + // Since side 1 is repeating true, we need to remember nulls that influence for next stage. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (inputColVector2.isNull[i]) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } else if (vector2[i] == 1) { + andSelected[andSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (inputColVector2.isNull[i]) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } else if (vector2[i] == 1) { + andSelected[andSel++] = i; + } + } + } + } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { + + if (inputColVector2.isNull[0]) { + + // Repeated null for side 2 -- remember nulls that have influence if side 1 true. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (vector1[i] == 1) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } + } + } + } else if (vector2Value == 0) { + + // Finish if repeating false for side 2. + outV.isRepeating = true; + outputVector[0] = 0; + return; + } else { + + // Since side 2 is repeating true and side 1 has no nulls, just remember side 1 true + // for next stage.. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + andSelected[andSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (vector1[i] == 1) { + andSelected[andSel++] = i; + } + } + } + } + } else /* neither side is repeating */{ + + // Remember side 2 nulls that have influence if side 1 true. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (inputColVector2.isNull[i]) { + if (vector1[i] == 1) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } + } else if ((vector1[i] & vector2[i]) != 0) { + andSelected[andSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (inputColVector2.isNull[i]) { + if (vector1[i] == 1) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } + } else if ((vector1[i] & vector2[i]) != 0) { + andSelected[andSel++] = i; + } + } + } + } + } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { + + // Only input 1 side can have nulls. + + if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { + // All must be selected otherwise size would be zero + + // Finish if repeating false for side 2. + if (vector2Value == 0) { + outV.isRepeating = true; + outputVector[0] = 0; + return; + } + // Side 2 is repeating true. Remember a side 1 repeating null since it influences. + if (inputColVector1.isNull[0]) { + andRepeatingIsNull = true; + } else if ((vector1Value & vector2Value) == 0) { + // Finished when both sides are not true. + outV.isRepeating = true; + outputVector[0] = 0; + return; + } + andRepeating = true; + } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { + + if (inputColVector1.isNull[0]) { + + // Repeated null for side 1 -- remember nulls that have influence if side 2 true. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (vector2[i] == 1) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (vector2[i] == 1) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } + } + } + } else if (vector1Value == 0) { + + // Finish if repeating false for side 1. + outV.isRepeating = true; + outputVector[0] = 0; + return; + } else { + + // Since side 1 is repeating true and side 2 has no nulls, just remember side 2 true + // for next stage.. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (vector2[i] == 1) { + andSelected[andSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (vector2[i] == 1) { + andSelected[andSel++] = i; + } + } + } + } + } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { + + // Finished if repeating false for side 2 + if (vector2Value == 0) { + outV.isRepeating = true; + outputVector[0] = 0; + return; + } + // Side 2 is repeating true, so remember side 1 nulls that have influence if side 1 true. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (inputColVector1.isNull[i]) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } else if (vector1[i] == 1) { + andSelected[andSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (inputColVector1.isNull[i]) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } else if (vector1[i] != 0) { + andSelected[andSel++] = i; + } + } + } + } else /* neither side is repeating */{ + + // Remember side 1 nulls that have influence if side 2 true. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (inputColVector1.isNull[i]) { + if (vector2[i] == 1) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } + } else if ((vector1[i] & vector2[i]) != 0) { + andSelected[andSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (inputColVector1.isNull[i]) { + if (vector2[i] == 1) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } + } else if ((vector1[i] & vector2[i]) == 1) { + andSelected[andSel++] = i; + } + } + } + } + } else /* !inputColVector1.noNulls && !inputColVector2.noNulls */{ + + // Either input 1 or input 2 may can have nulls. + + if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { + // All must be selected otherwise size would be zero + + if (inputColVector1.isNull[0] || inputColVector2.isNull[0]) { + if (inputColVector1.isNull[0] && inputColVector2.isNull[0]) { + // 2 nulls may still have influence later. + andRepeatingIsNull = true; + } else if (inputColVector1.isNull[0] && vector2Value == 1 + || inputColVector2.isNull[0] && vector1Value == 1) { + // Influence. + andRepeatingIsNull = true; + } else { + // A null and a repeating false makes result false. + outV.isRepeating = true; + outputVector[0] = 0; + return; + } + } else if ((vector1[0] & vector2[0]) == 0) { + outV.isRepeating = true; + outputVector[0] = 0; + return; + } + // All true or has all null influence. + andRepeating = true; + } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { + + if (inputColVector1.isNull[0] || vector1Value == 1) { + + // Repeated true or null for side 1 -- still remember side 2 nulls that may have influence. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (inputColVector2.isNull[i]) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } else if (vector2[i] == 1) { + andSelected[andSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (inputColVector2.isNull[i]) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } else if (vector2[i] != 0) { + andSelected[andSel++] = i; + } + } + } + } else { + outV.isRepeating = true; + outputVector[0] = 0; + return; + } + } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { + + // Repeated true or null for side 2 -- still remember side 1 nulls that may have influence. + if (inputColVector2.isNull[0]) { + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (inputColVector1.isNull[i]) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } else if (vector1[i] == 1) { + andSelected[andSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (inputColVector1.isNull[i]) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } else if (vector1[i] != 0) { + andSelected[andSel++] = i; + } + } + } + } else { + outV.isRepeating = true; + outputVector[0] = 0; + return; + } + } else /* neither side is repeating */{ + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (inputColVector1.isNull[i] || inputColVector2.isNull[i]) { + if (inputColVector1.isNull[i] && inputColVector2.isNull[i]) { + // 2 nulls may still have influence later. + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } else if (inputColVector1.isNull[i] && vector2[i] == 1 + || inputColVector2.isNull[i] && vector1[i] == 1) { + // Influence. + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } + } else if ((vector1[i] & vector2[i]) == 1) { + andSelected[andSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (inputColVector1.isNull[i] || inputColVector2.isNull[i]) { + if (inputColVector1.isNull[i] && inputColVector2.isNull[i]) { + // 2 nulls may still have influence later. + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } else if (inputColVector1.isNull[i] && vector2[i] == 1 + || inputColVector2.isNull[i] && vector1[i] == 1) { + // Influence. + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } + } else if ((vector1[i] & vector2[i]) == 1) { + andSelected[andSel++] = i; + } + } + } + } + } + + // Process input #3 and beyond. + + int colNum = 2; + do { + if (!andRepeating && andSel == 0) { + // The intermediateNulls array is only valid when andSel > 0, so no nulls set here. + Arrays.fill(outputVector, 0, VectorizedRowBatch.DEFAULT_SIZE, 0); + return; + } + + LongColumnVector nextColVector = (LongColumnVector) batch.cols[colNums[colNum]]; + long[] nextVector = nextColVector.vector; + + if (andRepeating) { + if (nextColVector.isRepeating) { + if (nextColVector.noNulls || !nextColVector.isNull[0]) { + if (nextVector[0] == 0) { + outV.isRepeating = true; + outputVector[0] = 0; + return; + } else { + // andRepeatingIsNull goes on to the next stage, too. + } + } else { + Preconditions.checkState(nextColVector.isNull[0]); + + // This stage null still could have influence even if andRepeating is also a null. + andRepeatingIsNull = true; + } + // Continue as andRepeating + } else if (nextColVector.noNulls) { + + // Switch away from andRepeating. + + Preconditions.checkState(andSel == 0); + andRepeating = false; + + if (andRepeatingIsNull) { + + // We only set intermediateNulls when the next row is true. + // The andSelected array will represent rows with true values and at least one null. + // If all future rounds are true, then the null will cause the result to be null. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (nextVector[i] == 1) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (nextVector[i] == 1) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } + } + } + andRepeatingIsNull = false; + } else { + + // Previous rounds were all true. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (nextVector[i] == 1) { + andSelected[andSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (nextVector[i] == 1) { + andSelected[andSel++] = i; + } + } + } + } + } else { + + // Can have nulls. + + // Switch away from andRepeating. + + Preconditions.checkState(andSel == 0); + andRepeating = false; + + if (andRepeatingIsNull) { + + // We only set intermediateNulls when the next row is true. + // The andSelected array will represent rows with true values and at least one null. + // If all future rounds are true, then the null will cause the result to be null. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (!nextColVector.isNull[i] && nextVector[i] == 1) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (!nextColVector.isNull[i] && nextVector[i] == 1) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } + } + } + andRepeatingIsNull = false; + } else { + + // Previous rounds were all true. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (!nextColVector.isNull[i] && nextVector[i] == 1) { + andSelected[andSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (!nextColVector.isNull[i] && nextVector[i] == 1) { + andSelected[andSel++] = i; + } + } + } + } + } + } else { + + // The andSelected array contains the rows that are currently true and possibly + // had some nulls. + + if (nextColVector.isRepeating) { + if (nextColVector.noNulls) { + if (nextVector[0] == 0) { + outV.isRepeating = true; + outputVector[0] = 0; + return; + } + } else if (nextColVector.isNull[0]) { + // Repeating null has influence over current andSelected entries. + for (int j = 0; j < andSel; j++) { + int i = andSelected[j]; + intermediateNulls[i] = true; + } + } + // All prior rows in andSelected remain because they are true. They may have nulls. + } else if (nextColVector.noNulls) { + + // New round eliminates false rows. + int newSel = 0; + for (int j = 0; j < andSel; j++) { + int i = andSelected[j]; + if (nextVector[i] == 1) { + andSelected[newSel++] = i; + } + } + andSel = newSel; + } else { + + // Can have nulls. + + // New round eliminates sets true rows and notes nulls. + int newSel = 0; + for (int j = 0; j < andSel; j++) { + int i = andSelected[j]; + if (nextColVector.isNull[i]) { + // Keep the entry with true(s) and now has at least one null. + intermediateNulls[i] = true; + andSelected[newSel++] = i; + } else if (nextVector[i] == 1) { + // Continue on whether andSelected entry is true. It may have nulls. + andSelected[newSel++] = i; + } + } + andSel = newSel; + } + } + } while (++colNum < colNums.length); + + // Produce final result. + + if (andRepeating) { + outV.isRepeating = true; + if (andRepeatingIsNull) { + outV.noNulls = false; + outV.isNull[0] = true; + } else { + outputVector[0] = 0; + } + } else if (andSel == 0) { + outV.isRepeating = true; + outputVector[0] = 0; + } else { + // Ok, some rows were true and/or null throughout. + Arrays.fill(outputVector, 0, VectorizedRowBatch.DEFAULT_SIZE, 0); + for (int j = 0; j < andSel; j++) { + int i = andSelected[j]; + if (intermediateNulls[i]) { + outV.noNulls = false; + outV.isNull[i] = true; + } else { + outputVector[i] = 1; + } + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "boolean"; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("long"), + VectorExpressionDescriptor.ArgumentType.getType("long")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColMultiOrCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColMultiOrCol.java new file mode 100644 index 0000000..4b0363e --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColMultiOrCol.java @@ -0,0 +1,923 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.Arrays; + +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +import com.google.common.base.Preconditions; + +/** + * This class performs OR expression on two input columns and stores, + * the boolean output in a separate output column. The boolean values + * are supposed to be represented as 0/1 in a long vector. + */ +public class ColMultiOrCol extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int[] colNums; + private int outputColumn; + private int[] orSelected; + private boolean[] intermediateNulls; + + public ColMultiOrCol(int[] colNums, int outputColumn) { + this(); + this.colNums = colNums; + this.outputColumn = outputColumn; + orSelected = new int[VectorizedRowBatch.DEFAULT_SIZE]; + intermediateNulls = new boolean[VectorizedRowBatch.DEFAULT_SIZE]; + } + + public ColMultiOrCol() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + Preconditions.checkState(colNums.length > 2); + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector inputColVector1 = (LongColumnVector) batch.cols[colNums[0]]; + LongColumnVector inputColVector2 = (LongColumnVector) batch.cols[colNums[1]]; + int[] sel = batch.selected; + int n = batch.size; + long[] vector1 = inputColVector1.vector; + long[] vector2 = inputColVector2.vector; + + LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; + long[] outputVector = outV.vector; + if (n <= 0) { + // Nothing to do + return; + } + + /** + * Null processing complicates the algorithm here for Multi-OR. + * + * Any true --> true + * 0 or more false with 1 or more null --> result = null + * All false --> false + * + * We remember true as 1 in the outputVector because once set, it doesn't get unset. + * We remember nulls in the intermediateNulls so we can later ignore it if a row is true. + * Entries in orSelected are either false and/or null. + * + * The flags orRepeating, and orRepeatingHasNulls track if the whole batch so far + * false and/or null. We are only tracking if there have been some nulls here. + */ + boolean orRepeating = false; + boolean orRepeatingHasNulls = false; + int orSel = 0; + Arrays.fill(intermediateNulls, 0, VectorizedRowBatch.DEFAULT_SIZE, false); + + outV.reset(); + + long vector1Value = vector1[0]; + long vector2Value = vector2[0]; + if (inputColVector1.noNulls && inputColVector2.noNulls) { + if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { + // All must be selected otherwise size would be zero + // Repeating property will not change. + + if ((vector1Value | vector2Value) == 1) { + outV.isRepeating = true; + outputVector[0] = 1; + return; + } + orRepeating = true; + } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { + if (vector1Value == 1) { + outV.isRepeating = true; + outputVector[0] = 1; + return; + } + // Side 1 is false -- set from side 2. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (vector2[i] == 1) { + outputVector[i] = 1; + } else { + orSelected[orSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (vector2[i] == 1) { + outputVector[i] = 1; + } else { + orSelected[orSel++] = i; + } + } + } + } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (vector2Value == 1) { + outV.isRepeating = true; + outputVector[0] = 1; + return; + } + // Side 2 is false -- set from side 1. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + outputVector[i] = 1; + } else { + orSelected[orSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (vector1[i] == 1) { + outputVector[i] = 1; + } else { + orSelected[orSel++] = i; + } + } + } + } else /* neither side is repeating */{ + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if ((vector1[i] | vector2[i]) == 1) { + outputVector[i] = 1; + } else { + orSelected[orSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if ((vector1[i] | vector2[i]) == 1) { + outputVector[i] = 1; + } else { + orSelected[orSel++] = i; + } + } + } + } + } else if (inputColVector1.noNulls && !inputColVector2.noNulls) { + // only input 2 side has nulls + if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { + // All must be selected otherwise size would be zero + // Repeating property will not change. + + if (vector1Value == 1) { + outV.isRepeating = true; + outputVector[0] = 1; + return; + } + if (inputColVector2.isNull[0]) { + orRepeatingHasNulls = true; + } else if (vector2Value == 1) { + outV.isRepeating = true; + outputVector[0] = 1; + return; + } + orRepeating = true; + } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { + if (vector1Value == 1) { + outV.isRepeating = true; + outputVector[0] = 1; + return; + } + // Side 1 is false -- set from side 2. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (inputColVector2.isNull[i]) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else if (vector2[i] == 1) { + outputVector[i] = 1; + } else { + orSelected[orSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (inputColVector2.isNull[i]) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else if (vector2[i] == 1) { + outputVector[i] = 1; + } else { + orSelected[orSel++] = i; + } + } + } + } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (inputColVector2.isNull[0]) { + + // Side 2 is null -- set from side 1 and include null in intermediate when false. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + outputVector[i] = 1; + } else { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (vector1[i] == 1) { + outputVector[i] = 1; + } else { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } + } + } + } else if (vector2Value == 1) { + outV.isRepeating = true; + outputVector[0] = 1; + return; + } else { + + // Side 2 is false -- set from side 1. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + outputVector[i] = 1; + } else { + orSelected[orSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (vector1[i] == 1) { + outputVector[i] = 1; + } else { + orSelected[orSel++] = i; + } + } + } + } + } else /* neither side is repeating */{ + + // Check side 1 noNulls and side 2 with nulls. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (inputColVector2.isNull[i]) { + if (vector1[i] == 1) { + outputVector[i] = 1; + } else { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } + } else { + if ((vector1[i] | vector2[i]) == 1) { + outputVector[i] = 1; + } else { + orSelected[orSel++] = i; + } + } + } + } else { + for (int i = 0; i != n; i++) { + if (inputColVector2.isNull[i]) { + if (vector1[i] == 1) { + outputVector[i] = 1; + } else { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } + } else { + if ((vector1[i] | vector2[i]) == 1) { + outputVector[i] = 1; + } else { + orSelected[orSel++] = i; + } + } + } + } + } + } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { + + // Only input 1 side has nulls + + if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { + // All must be selected otherwise size would be zero + // Repeating property will not change. + + if (vector2Value == 1) { + outV.isRepeating = true; + outputVector[0] = 1; + return; + } + if (inputColVector1.isNull[0]) { + orRepeatingHasNulls = true; + } else if (vector1Value == 1) { + outV.isRepeating = true; + outputVector[0] = 1; + return; + } + orRepeating = true; + } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { + + if (inputColVector1.isNull[0]) { + + // Side 1 is null -- set from side 21 and include null in intermediate when false. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (vector2[i] == 1) { + outputVector[i] = 1; + } else { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (vector2[i] == 1) { + outputVector[i] = 1; + } else { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } + } + } + } else if (vector1Value == 1) { + outV.isRepeating = true; + outputVector[0] = 1; + return; + } else { + + // Side 1 is false -- set from side 2. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (vector2[i] == 1) { + outputVector[i] = 1; + } else { + orSelected[orSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (vector2[i] == 1) { + outputVector[i] = 1; + } else { + orSelected[orSel++] = i; + } + } + } + } + } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (vector2Value == 1) { + outV.isRepeating = true; + outputVector[0] = 1; + return; + } + // Side 2 is false -- set from side 1. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (inputColVector1.isNull[i]) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else if (vector1[i] == 1) { + outputVector[i] = 1; + } else { + orSelected[orSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (inputColVector1.isNull[i]) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else if (vector1[i] == 1) { + outputVector[i] = 1; + } else { + orSelected[orSel++] = i; + } + } + } + } else /* neither side is repeating */{ + + // Check side 1 with nulls and side 2 noNull. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (inputColVector1.isNull[i]) { + if (vector2[i] == 1) { + outputVector[i] = 1; + } else { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } + } else { + if ((vector1[i] | vector2[i]) == 1) { + outputVector[i] = 1; + } else { + orSelected[orSel++] = i; + } + } + } + } else { + for (int i = 0; i != n; i++) { + if (inputColVector1.isNull[i]) { + if (vector2[i] == 1) { + outputVector[i] = 1; + } else { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } + } else { + if ((vector1[i] | vector2[i]) == 1) { + outputVector[i] = 1; + } else { + orSelected[orSel++] = i; + } + } + } + } + } + } else /* !inputColVector1.noNulls && !inputColVector2.noNulls */{ + + // Either input 1 or input 2 may have nulls. + + if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { + // All must be selected otherwise size would be zero + // Repeating property will not change. + + if (inputColVector1.isNull[0] && inputColVector2.isNull[0]) { + orRepeatingHasNulls = true; + } else if (inputColVector1.isNull[0]) { + if (vector2Value == 1) { + outV.isRepeating = true; + outputVector[0] = 1; + return; + } + } else if (inputColVector2.isNull[0]) { + if (vector1Value == 1) { + outV.isRepeating = true; + outputVector[0] = 1; + return; + } + } else { + if ((vector1Value | vector2Value) == 1) { + outV.isRepeating = true; + outputVector[0] = 1; + return; + } + } + orRepeating = true; + } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { + if (inputColVector1.isNull[0]) { + // Since side 1 is null, we need to set individual nulls for every non-true row. + + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (inputColVector2.isNull[i] || vector2[i] == 0) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } else { + for (int i = 0; i != n; i++) { + if (inputColVector2.isNull[i] || vector2[i] == 0) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } + } else if (vector1Value == 1) { + outV.isRepeating = true; + outputVector[0] = 1; + return; + } else { + + // Side 1 is false. We only need to process side 2. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (inputColVector2.isNull[i]) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else if (vector2[i] == 0) { + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } else { + for (int i = 0; i != n; i++) { + if (inputColVector2.isNull[i]) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else if (vector2[i] == 0) { + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } + } + } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (inputColVector2.isNull[0]) { + + // Since side 2 is null, we need to set individual nulls for every non-true row. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (inputColVector1.isNull[i] || vector1[i] == 0) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } else { + for (int i = 0; i != n; i++) { + if (inputColVector1.isNull[i] || vector1[i] == 0) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } + } else if (vector2Value == 1) { + outV.isRepeating = true; + outputVector[0] = 1; + return; + } else { + + // Side 2 is false. We only need to process side 1. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (inputColVector1.isNull[i]) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else if (vector1[i] == 0) { + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } else { + for (int i = 0; i != n; i++) { + if (inputColVector1.isNull[i]) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else if (vector1[i] == 0) { + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } + } + } else /* neither side is repeating */{ + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (inputColVector1.isNull[i] && inputColVector2.isNull[i]) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else if (inputColVector1.isNull[i]) { + if (vector2[i] == 0) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } else if (inputColVector2.isNull[i]) { + if (vector1[i] == 0) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } else { + if ((vector1[i] | vector2[i]) == 1) { + outputVector[i] = 1; + } else { + orSelected[orSel++] = i; + } + } + } + } else { + for (int i = 0; i != n; i++) { + if (inputColVector1.isNull[i] && inputColVector2.isNull[i]) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else if (inputColVector1.isNull[i]) { + if (vector2[i] == 0) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } else if (inputColVector2.isNull[i]) { + if (vector1[i] == 0) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } else { + if ((vector1[i] | vector2[i]) == 1) { + outputVector[i] = 1; + } else { + orSelected[orSel++] = i; + } + } + } + } + } + } + + // Process input #3 and beyond. + + int colNum = 2; + do { + LongColumnVector nextColVector = (LongColumnVector) batch.cols[colNums[colNum]]; + long[] nextVector = nextColVector.vector; + + if (orRepeating) { + + // The orRepeating flag means the whole batch is false or nulls. + + if (nextColVector.isRepeating) { + if (nextColVector.noNulls || !nextColVector.isNull[0]) { + if (nextVector[0] == 1) { + outV.isRepeating = true; + outputVector[0] = 1; + return; + } else { + // The orRepeatingHasNulls flag goes on to the next stage, too. + } + } else { + Preconditions.checkState(nextColVector.isNull[0]); + + // This stage null still could have influence even if andRepeating is also a null. + orRepeatingHasNulls = true; + } + // Continue as orRepeating + } else if (nextColVector.noNulls) { + + // Switch away from orRepeating. + Preconditions.checkState(orSel == 0); + orRepeating = false; + + if (orRepeatingHasNulls) { + + // We only set intermediateNulls when the next row is false. + // The orSelected array will represent rows with false values and at least one null. + // If all future rounds are false, then the null will cause the result to be null. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (nextVector[i] == 0) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } else { + for (int i = 0; i != n; i++) { + if (nextVector[i] == 0) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } + orRepeatingHasNulls = false; + } else { + + // Previous rounds were all false. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (nextVector[i] == 0) { + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } else { + for (int i = 0; i != n; i++) { + if (nextVector[i] == 0) { + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } + } + } else { + + // Can have nulls. + + // Switch away from orRepeating. + Preconditions.checkState(orSel == 0); + orRepeating = false; + + if (orRepeatingHasNulls) { + + // We only set intermediateNulls when the next row is false. + // The orSelected array will represent rows with false values and at least one null. + // If all future rounds are false, then the null will cause the result to be null. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (!nextColVector.isNull[i] && nextVector[i] == 0) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } else { + for (int i = 0; i != n; i++) { + if (!nextColVector.isNull[i] && nextVector[i] == 0) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } + orRepeatingHasNulls = false; + } else { + + // Previous rounds were all false. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (!nextColVector.isNull[i]) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else if (nextVector[i] == 0) { + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } else { + for (int i = 0; i != n; i++) { + if (!nextColVector.isNull[i]) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else if (nextVector[i] == 0) { + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } + } + } + } else { + + // The outputVector array contains the rows that are currently true and the rows + // are false and/or have null rows are represented in the intermediateNulls and + // orSelected arrays. + + if (nextColVector.isRepeating) { + if (nextColVector.noNulls) { + if (nextVector[0] == 1) { + outV.isRepeating = true; + outputVector[0] = 1; + return; + } + } else if (nextColVector.isNull[0]) { + + // Repeating null has influence over current orSelected entries. + for (int j = 0; j < orSel; j++) { + int i = orSelected[j]; + intermediateNulls[i] = true; + } + } + } else if (nextColVector.noNulls) { + + // Set true entries in output vector. + int newSel = 0; + for (int j = 0; j < orSel; j++) { + int i = orSelected[j]; + if (nextVector[i] == 0) { + orSelected[newSel++] = i; + } else { + outputVector[i] = 1; + } + } + orSel = newSel; + } else { + + // Can have nulls. + + // New round eliminates sets true rows and notes nulls. + int newSel = 0; + for (int j = 0; j < orSel; j++) { + int i = orSelected[j]; + if (nextColVector.isNull[i]) { + // Keep the entry with true(s) and now has at least one null. + intermediateNulls[i] = true; + orSelected[newSel++] = i; + } else if (nextVector[i] == 0) { + // Continue on whether orSelected entry is false. It may have nulls. + orSelected[newSel++] = i; + } else { + outputVector[i] = 1; + } + } + orSel = newSel; + } + } + } while (++colNum < colNums.length); + + // Produce final result. + + if (orRepeating) { + + // The orRepeating flags means the whole batch is false and may have nulls. + outV.isRepeating = true; + if (orRepeatingHasNulls) { + outV.noNulls = false; + outV.isNull[0] = true; + } else { + outputVector[0] = 0; + } + } else { + + // When there is a false entry and there were intermediate nulls, the result row will be null. + // Note the true entries were already set in outputVector as we processed the rounds above. + for (int j = 0; j < orSel; j++) { + int i = orSelected[j]; + Preconditions.checkState(outputVector[i] == 0); + if (intermediateNulls[i]) { + outV.noNulls = false; + outV.isNull[i] = true; + } + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "boolean"; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("long"), + VectorExpressionDescriptor.ArgumentType.getType("long")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/test/queries/clientpositive/vector_multi_and.q ql/src/test/queries/clientpositive/vector_multi_and.q new file mode 100644 index 0000000..257a9ae --- /dev/null +++ ql/src/test/queries/clientpositive/vector_multi_and.q @@ -0,0 +1,47 @@ +set hive.cli.print.header=true; +set hive.explain.user=false; +SET hive.auto.convert.join=true; +set hive.fetch.task.conversion=none; +set hive.mapred.mode=nonstrict; + +-- SORT_QUERY_RESULTS + +create table vectortab_a_1k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/vectortab_a_1k' OVERWRITE INTO TABLE vectortab_a_1k; + +CREATE TABLE scratch AS SELECT t, si, i, b FROM vectortab_a_1k; +INSERT INTO TABLE scratch VALUES (NULL, NULL, NULL, NULL); + +CREATE TABLE vectortab_a_1k_orc STORED AS ORC AS SELECT * FROM scratch; + +SET hive.vectorized.execution.enabled=true; + +EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab_a_1k_orc order by t) as q; + +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab_a_1k_orc order by t) as q; + +EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 AND si > 0 AND i < 0 AND b > 0) as multi_and_col from vectortab_a_1k_orc order by t) as q; + +SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 AND si > 0 AND i < 0 AND b > 0) as multi_and_col from vectortab_a_1k_orc order by t) as q; diff --git ql/src/test/queries/clientpositive/vector_multi_or.q ql/src/test/queries/clientpositive/vector_multi_or.q new file mode 100644 index 0000000..7ab259c --- /dev/null +++ ql/src/test/queries/clientpositive/vector_multi_or.q @@ -0,0 +1,47 @@ +set hive.cli.print.header=true; +set hive.explain.user=false; +SET hive.auto.convert.join=true; +set hive.fetch.task.conversion=none; +set hive.mapred.mode=nonstrict; + +-- SORT_QUERY_RESULTS + +create table vectortab_a_1k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/vectortab_a_1k' OVERWRITE INTO TABLE vectortab_a_1k; + +CREATE TABLE scratch AS SELECT t, si, i, b FROM vectortab_a_1k; +INSERT INTO TABLE scratch VALUES (NULL, NULL, NULL, NULL); + +CREATE TABLE vectortab_a_1k_orc STORED AS ORC AS SELECT * FROM scratch; + +SET hive.vectorized.execution.enabled=true; + +EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab_a_1k_orc order by t) as q; + +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab_a_1k_orc order by t) as q; + +EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab_a_1k_orc order by t) as q; + +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab_a_1k_orc order by t) as q; diff --git ql/src/test/results/clientpositive/vector_multi_and.q.out ql/src/test/results/clientpositive/vector_multi_and.q.out new file mode 100644 index 0000000..9755679 --- /dev/null +++ ql/src/test/results/clientpositive/vector_multi_and.q.out @@ -0,0 +1,225 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table vectortab_a_1k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab_a_1k +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table vectortab_a_1k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab_a_1k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab_a_1k' OVERWRITE INTO TABLE vectortab_a_1k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@vectortab_a_1k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab_a_1k' OVERWRITE INTO TABLE vectortab_a_1k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@vectortab_a_1k +PREHOOK: query: CREATE TABLE scratch AS SELECT t, si, i, b FROM vectortab_a_1k +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@vectortab_a_1k +PREHOOK: Output: database:default +PREHOOK: Output: default@scratch +POSTHOOK: query: CREATE TABLE scratch AS SELECT t, si, i, b FROM vectortab_a_1k +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@vectortab_a_1k +POSTHOOK: Output: database:default +POSTHOOK: Output: default@scratch +POSTHOOK: Lineage: scratch.b SIMPLE [(vectortab_a_1k)vectortab_a_1k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: scratch.i SIMPLE [(vectortab_a_1k)vectortab_a_1k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: scratch.si SIMPLE [(vectortab_a_1k)vectortab_a_1k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: scratch.t SIMPLE [(vectortab_a_1k)vectortab_a_1k.FieldSchema(name:t, type:tinyint, comment:null), ] +t si i b +PREHOOK: query: INSERT INTO TABLE scratch VALUES (NULL, NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@scratch +POSTHOOK: query: INSERT INTO TABLE scratch VALUES (NULL, NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@scratch +POSTHOOK: Lineage: scratch.b EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: scratch.i EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: scratch.si EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: scratch.t EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: CREATE TABLE vectortab_a_1k_orc STORED AS ORC AS SELECT * FROM scratch +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@scratch +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab_a_1k_orc +POSTHOOK: query: CREATE TABLE vectortab_a_1k_orc STORED AS ORC AS SELECT * FROM scratch +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@scratch +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab_a_1k_orc +POSTHOOK: Lineage: vectortab_a_1k_orc.b SIMPLE [(scratch)scratch.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: vectortab_a_1k_orc.i SIMPLE [(scratch)scratch.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab_a_1k_orc.si SIMPLE [(scratch)scratch.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: vectortab_a_1k_orc.t SIMPLE [(scratch)scratch.FieldSchema(name:t, type:tinyint, comment:null), ] +scratch.t scratch.si scratch.i scratch.b +PREHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab_a_1k_orc order by t) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab_a_1k_orc order by t) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vectortab_a_1k_orc + Statistics: Num rows: 1001 Data size: 19100 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: tinyint), si (type: smallint), i (type: int), ((t < 0) and (si > 0) and (i < 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1001 Data size: 19100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Statistics: Num rows: 1001 Data size: 19100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: boolean) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,VALUE._col0,VALUE._col1,VALUE._col2) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1001 Data size: 19100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab_a_1k_orc order by t) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab_a_1k_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab_a_1k_orc order by t) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab_a_1k_orc +#### A masked pattern was here #### +c0 +55381540934 +PREHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 AND si > 0 AND i < 0 AND b > 0) as multi_and_col from vectortab_a_1k_orc order by t) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 AND si > 0 AND i < 0 AND b > 0) as multi_and_col from vectortab_a_1k_orc order by t) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vectortab_a_1k_orc + Statistics: Num rows: 1001 Data size: 19100 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), ((t < 0) and (si > 0) and (i < 0) and (b > 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1001 Data size: 19100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Statistics: Num rows: 1001 Data size: 19100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: boolean) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,VALUE._col0,VALUE._col1,VALUE._col2,VALUE._col3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1001 Data size: 19100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 AND si > 0 AND i < 0 AND b > 0) as multi_and_col from vectortab_a_1k_orc order by t) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab_a_1k_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 AND si > 0 AND i < 0 AND b > 0) as multi_and_col from vectortab_a_1k_orc order by t) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab_a_1k_orc +#### A masked pattern was here #### +c0 +-46102196348 diff --git ql/src/test/results/clientpositive/vector_multi_or.q.out ql/src/test/results/clientpositive/vector_multi_or.q.out new file mode 100644 index 0000000..5d9ace9 --- /dev/null +++ ql/src/test/results/clientpositive/vector_multi_or.q.out @@ -0,0 +1,225 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table vectortab_a_1k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab_a_1k +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table vectortab_a_1k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab_a_1k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab_a_1k' OVERWRITE INTO TABLE vectortab_a_1k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@vectortab_a_1k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab_a_1k' OVERWRITE INTO TABLE vectortab_a_1k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@vectortab_a_1k +PREHOOK: query: CREATE TABLE scratch AS SELECT t, si, i, b FROM vectortab_a_1k +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@vectortab_a_1k +PREHOOK: Output: database:default +PREHOOK: Output: default@scratch +POSTHOOK: query: CREATE TABLE scratch AS SELECT t, si, i, b FROM vectortab_a_1k +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@vectortab_a_1k +POSTHOOK: Output: database:default +POSTHOOK: Output: default@scratch +POSTHOOK: Lineage: scratch.b SIMPLE [(vectortab_a_1k)vectortab_a_1k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: scratch.i SIMPLE [(vectortab_a_1k)vectortab_a_1k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: scratch.si SIMPLE [(vectortab_a_1k)vectortab_a_1k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: scratch.t SIMPLE [(vectortab_a_1k)vectortab_a_1k.FieldSchema(name:t, type:tinyint, comment:null), ] +t si i b +PREHOOK: query: INSERT INTO TABLE scratch VALUES (NULL, NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@scratch +POSTHOOK: query: INSERT INTO TABLE scratch VALUES (NULL, NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@scratch +POSTHOOK: Lineage: scratch.b EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: scratch.i EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: scratch.si EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: scratch.t EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: CREATE TABLE vectortab_a_1k_orc STORED AS ORC AS SELECT * FROM scratch +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@scratch +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab_a_1k_orc +POSTHOOK: query: CREATE TABLE vectortab_a_1k_orc STORED AS ORC AS SELECT * FROM scratch +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@scratch +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab_a_1k_orc +POSTHOOK: Lineage: vectortab_a_1k_orc.b SIMPLE [(scratch)scratch.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: vectortab_a_1k_orc.i SIMPLE [(scratch)scratch.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab_a_1k_orc.si SIMPLE [(scratch)scratch.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: vectortab_a_1k_orc.t SIMPLE [(scratch)scratch.FieldSchema(name:t, type:tinyint, comment:null), ] +scratch.t scratch.si scratch.i scratch.b +PREHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab_a_1k_orc order by t) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab_a_1k_orc order by t) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vectortab_a_1k_orc + Statistics: Num rows: 1001 Data size: 19100 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: tinyint), si (type: smallint), i (type: int), ((t < 0) or (si > 0) or (i < 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1001 Data size: 19100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Statistics: Num rows: 1001 Data size: 19100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: boolean) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,VALUE._col0,VALUE._col1,VALUE._col2) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1001 Data size: 19100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab_a_1k_orc order by t) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab_a_1k_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab_a_1k_orc order by t) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab_a_1k_orc +#### A masked pattern was here #### +c0 +55381541673 +PREHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab_a_1k_orc order by t) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab_a_1k_orc order by t) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vectortab_a_1k_orc + Statistics: Num rows: 1001 Data size: 19100 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: tinyint), si (type: smallint), i (type: int), ((t < 0) or (si > 0) or (i < 0) or (b > 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1001 Data size: 19100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Statistics: Num rows: 1001 Data size: 19100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: boolean) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,VALUE._col0,VALUE._col1,VALUE._col2) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1001 Data size: 19100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab_a_1k_orc order by t) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab_a_1k_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab_a_1k_orc order by t) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab_a_1k_orc +#### A masked pattern was here #### +c0 +55381541786