diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizedLogicBench.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizedLogicBench.java index 50dadb2..7ff6158 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizedLogicBench.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizedLogicBench.java @@ -55,7 +55,7 @@ public void setup() { rowBatch = buildRowBatch(new LongColumnVector(), 2, getBooleanLongColumnVector(), getBooleanLongColumnVector()); - expression = new ColAndCol(0, 1, 2); + expression = new ColAndCol(new int[] {0, 1}, 2); } } @@ -64,7 +64,7 @@ public void setup() { public void setup() { rowBatch = buildRowBatch(new LongColumnVector(), 2, getBooleanLongColumnVector(), getBooleanRepeatingLongColumnVector()); - expression = new ColAndCol(0, 1, 2); + expression = new ColAndCol(new int[] {0, 1}, 2); } } @@ -73,7 +73,7 @@ public void setup() { public void setup() { rowBatch = buildRowBatch(new LongColumnVector(), 2, getBooleanRepeatingLongColumnVector(), getBooleanLongColumnVector()); - expression = new ColAndCol(0, 1, 2); + expression = new ColAndCol(new int[] {0, 1}, 2); } } @@ -82,7 +82,7 @@ public void setup() { public void setup() { rowBatch = buildRowBatch(new LongColumnVector(), 2, getBooleanLongColumnVector(), getBooleanLongColumnVector()); - expression = new ColOrCol(0, 1, 2); + expression = new ColOrCol(new int[] {0, 1}, 2); } } @@ -91,7 +91,7 @@ public void setup() { public void setup() { rowBatch = buildRowBatch(new LongColumnVector(), 2, getBooleanLongColumnVector(), getBooleanRepeatingLongColumnVector()); - expression = new ColOrCol(0, 1, 2); + expression = new ColOrCol(new int[] {0, 1}, 2); } } @@ -100,7 +100,7 @@ public void setup() { public void setup() { rowBatch = buildRowBatch(new LongColumnVector(), 2, getBooleanRepeatingLongColumnVector(), getBooleanLongColumnVector()); - expression = new ColOrCol(0, 1, 2); + expression = new ColOrCol(new int[] {0, 1}, 2); } } diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index f991d49..35e4ec4 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -294,7 +294,9 @@ minitez.query.files.shared=acid_globallimit.q,\ vector_leftsemi_mapjoin.q,\ vector_mapjoin_reduce.q,\ vector_mr_diff_schema_alias.q,\ + vector_multi_and_projection.q,\ vector_multi_insert.q,\ + vector_multi_or_projection.q,\ vector_non_string_partition.q,\ vector_nullsafe_join.q,\ vector_null_projection.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 3f95be2..3018ac7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -121,6 +121,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; @@ -479,7 +480,7 @@ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc, Mode mode) th mode); } if (ve == null) { - throw new HiveException("Could not vectorize expression: "+exprDesc.getName()); + throw new HiveException("Could not vectorize expression: "+exprDesc.toString()); } if (LOG.isDebugEnabled()) { LOG.debug("Input Expression = " + exprDesc.getTypeInfo() @@ -996,36 +997,33 @@ private VectorExpression getVectorExpressionForUdf(GenericUDF genericeUdf, int numChildren = (childExpr == null) ? 0 : childExpr.size(); - if (numChildren > 2 && genericeUdf != null && mode == Mode.FILTER && + if (genericeUdf != null && ((genericeUdf instanceof GenericUDFOPOr) || (genericeUdf instanceof GenericUDFOPAnd))) { - // Special case handling for Multi-OR and Multi-AND. + // Special case handling for Multi-OR and Multi-AND FILTER and PROJECTION. - for (int i = 0; i < numChildren; i++) { - ExprNodeDesc child = childExpr.get(i); - String childTypeString = child.getTypeString(); - if (childTypeString == null) { - throw new HiveException("Null child type name string"); - } - TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(childTypeString); - Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); - if (columnVectorType != ColumnVector.Type.LONG){ - return null; - } - if (!(child instanceof ExprNodeGenericFuncDesc) && !(child instanceof ExprNodeColumnDesc)) { - return null; - } - } Class vclass; if (genericeUdf instanceof GenericUDFOPOr) { - vclass = FilterExprOrExpr.class; + if (mode == Mode.PROJECTION) { + vclass = ColOrCol.class; + } else { + vclass = FilterExprOrExpr.class; + } } else if (genericeUdf instanceof GenericUDFOPAnd) { - vclass = FilterExprAndExpr.class; + if (mode == Mode.PROJECTION) { + vclass = ColAndCol.class; + } else { + vclass = FilterExprAndExpr.class; + } } else { throw new RuntimeException("Unexpected multi-child UDF"); } Mode childrenMode = getChildrenMode(mode, udfClass); - return createVectorExpression(vclass, childExpr, childrenMode, returnType); + if (mode == Mode.PROJECTION) { + return createVectorMultiAndOrProjectionExpr(vclass, childExpr, childrenMode, returnType); + } else { + return createVectorExpression(vclass, childExpr, childrenMode, returnType); + } } if (numChildren > VectorExpressionDescriptor.MAX_NUM_ARGUMENTS) { return null; @@ -1064,6 +1062,37 @@ private VectorExpression getVectorExpressionForUdf(GenericUDF genericeUdf, return createVectorExpression(vclass, childExpr, childrenMode, returnType); } + private void determineChildrenVectorExprAndArguments(Class vectorClass, + List childExpr, int numChildren, Mode childrenMode, + VectorExpression.Type [] inputTypes, List children, Object[] arguments) + throws HiveException { + for (int i = 0; i < numChildren; i++) { + ExprNodeDesc child = childExpr.get(i); + String undecoratedName = getUndecoratedName(child.getTypeInfo().getTypeName()); + inputTypes[i] = VectorExpression.Type.getValue(undecoratedName); + if (inputTypes[i] == VectorExpression.Type.OTHER){ + throw new HiveException("No vector type for " + vectorClass.getSimpleName() + " argument #" + i + " type name " + undecoratedName); + } + if (child instanceof ExprNodeGenericFuncDesc) { + VectorExpression vChild = getVectorExpression(child, childrenMode); + children.add(vChild); + arguments[i] = vChild.getOutputColumn(); + } else if (child instanceof ExprNodeColumnDesc) { + int colIndex = getInputColumnIndex((ExprNodeColumnDesc) child); + if (childrenMode == Mode.FILTER) { + // In filter mode, the column must be a boolean + children.add(new SelectColumnIsTrue(colIndex)); + } + arguments[i] = colIndex; + } else if (child instanceof ExprNodeConstantDesc) { + Object scalarValue = getVectorTypeScalarValue((ExprNodeConstantDesc) child); + arguments[i] = (null == scalarValue) ? getConstantVectorExpression(null, child.getTypeInfo(), childrenMode) : scalarValue; + } else { + throw new HiveException("Cannot handle expression type: " + child.getClass().getSimpleName()); + } + } + } + private VectorExpression createVectorExpression(Class vectorClass, List childExpr, Mode childrenMode, TypeInfo returnType) throws HiveException { int numChildren = childExpr == null ? 0: childExpr.size(); @@ -1071,31 +1100,41 @@ private VectorExpression createVectorExpression(Class vectorClass, List children = new ArrayList(); Object[] arguments = new Object[numChildren]; try { + determineChildrenVectorExprAndArguments(vectorClass, childExpr, numChildren, childrenMode, + inputTypes, children, arguments); + VectorExpression vectorExpression = instantiateExpression(vectorClass, returnType, arguments); + vectorExpression.setInputTypes(inputTypes); + if ((vectorExpression != null) && !children.isEmpty()) { + vectorExpression.setChildExpressions(children.toArray(new VectorExpression[0])); + } + return vectorExpression; + } catch (Exception ex) { + throw new HiveException(ex); + } finally { + for (VectorExpression ve : children) { + ocm.freeOutputColumn(ve.getOutputColumn()); + } + } + } + + private VectorExpression createVectorMultiAndOrProjectionExpr(Class vectorClass, + List childExpr, Mode childrenMode, TypeInfo returnType) throws HiveException { + int numChildren = childExpr == null ? 0: childExpr.size(); + VectorExpression.Type [] inputTypes = new VectorExpression.Type[numChildren]; + List children = new ArrayList(); + Object[] arguments = new Object[numChildren]; + try { + determineChildrenVectorExprAndArguments(vectorClass, childExpr, numChildren, childrenMode, + inputTypes, children, arguments); + + // For Multi-AND/OR, transform the arguments -- column indices into an array of int. + int[] colNums = new int[numChildren]; for (int i = 0; i < numChildren; i++) { - ExprNodeDesc child = childExpr.get(i); - String undecoratedName = getUndecoratedName(child.getTypeInfo().getTypeName()); - inputTypes[i] = VectorExpression.Type.getValue(undecoratedName); - if (inputTypes[i] == VectorExpression.Type.OTHER){ - throw new HiveException("No vector type for " + vectorClass.getSimpleName() + " argument #" + i + " type name " + undecoratedName); - } - if (child instanceof ExprNodeGenericFuncDesc) { - VectorExpression vChild = getVectorExpression(child, childrenMode); - children.add(vChild); - arguments[i] = vChild.getOutputColumn(); - } else if (child instanceof ExprNodeColumnDesc) { - int colIndex = getInputColumnIndex((ExprNodeColumnDesc) child); - if (childrenMode == Mode.FILTER) { - // In filter mode, the column must be a boolean - children.add(new SelectColumnIsTrue(colIndex)); - } - arguments[i] = colIndex; - } else if (child instanceof ExprNodeConstantDesc) { - Object scalarValue = getVectorTypeScalarValue((ExprNodeConstantDesc) child); - arguments[i] = (null == scalarValue) ? getConstantVectorExpression(null, child.getTypeInfo(), childrenMode) : scalarValue; - } else { - throw new HiveException("Cannot handle expression type: " + child.getClass().getSimpleName()); - } + colNums[i] = (Integer) arguments[i]; } + arguments = new Object[1]; + arguments[0] = colNums; + VectorExpression vectorExpression = instantiateExpression(vectorClass, returnType, arguments); vectorExpression.setInputTypes(inputTypes); if ((vectorExpression != null) && !children.isEmpty()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java index ff7371d..6bcab5e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java @@ -17,26 +17,34 @@ */ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import com.google.common.base.Preconditions; + /** - * Evaluate AND of two boolean columns and store result in the output boolean column. + * Evaluate AND of 3 or more boolean columns and store result in the output boolean column. */ public class ColAndCol extends VectorExpression { private static final long serialVersionUID = 1L; - private int colNum1; - private int colNum2; + private int[] colNums; private int outputColumn; + private int[] mapToChildExpression; + private int[] andSelected; + private boolean[] intermediateNulls; - public ColAndCol(int colNum1, int colNum2, int outputColumn) { + public ColAndCol(int[] colNums, int outputColumn) { this(); - this.colNum1 = colNum1; - this.colNum2 = colNum2; + this.colNums = colNums; this.outputColumn = outputColumn; + mapToChildExpression = null; + andSelected = new int[VectorizedRowBatch.DEFAULT_SIZE]; + intermediateNulls = new boolean[VectorizedRowBatch.DEFAULT_SIZE]; } public ColAndCol() { @@ -46,243 +54,381 @@ public ColAndCol() { @Override public void evaluate(VectorizedRowBatch batch) { + Preconditions.checkState(colNums.length >= 2); + + if (childExpressions != null && mapToChildExpression == null) { + // Some vector child expressions can be omitted (e.g. if they are existing boolean columns). + mapToChildExpression = new int [colNums.length]; + int childIndex = 0; + for (int i = 0; i < childExpressions.length; i++) { + VectorExpression ve = childExpressions[i]; + int outputColumn = ve.getOutputColumn(); + while (outputColumn != colNums[childIndex]) { + mapToChildExpression[childIndex++] = -1; + } + mapToChildExpression[childIndex++] = i; + } + } + + final int n = batch.size; + if (n <= 0) { + // Nothing to do + return; + } + if (childExpressions != null) { - super.evaluateChildren(batch); + // Evaluate first child expression. Other child are conditionally evaluated later + // based on whether there is still the possibility of any true and/or null results and + // only evaluated on current true and/or null rows. + int childExpressionIndex = mapToChildExpression[0]; + if (childExpressionIndex != -1) { + VectorExpression ve = childExpressions[childExpressionIndex]; + Preconditions.checkState(ve.getOutputColumn() == colNums[0]); + ve.evaluate(batch); + } } - LongColumnVector inputColVector1 = (LongColumnVector) batch.cols[colNum1]; - LongColumnVector inputColVector2 = (LongColumnVector) batch.cols[colNum2]; int[] sel = batch.selected; - int n = batch.size; - long[] vector1 = inputColVector1.vector; - long[] vector2 = inputColVector2.vector; LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; long[] outputVector = outV.vector; - if (n <= 0) { - // Nothing to do - return; - } - long vector1Value = vector1[0]; - long vector2Value = vector2[0]; - if (inputColVector1.noNulls && inputColVector2.noNulls) { - if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { - // All must be selected otherwise size would be zero - // Repeating property will not change. - outV.isRepeating = true; - outputVector[0] = vector1[0] & vector2[0]; - } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { - if (batch.selectedInUse) { - for (int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector1Value & vector2[i]; - } - } else { - for (int i = 0; i != n; i++) { - outputVector[i] = vector1Value & vector2[i]; - } + /** + * Null processing complicates the algorithm here for Multi-AND. + * + * All true --> true + * 0 or more true with 1 or more null --> result = null + * Any false --> false + * + * We remember nulls in the intermediateNulls array as we go along. + * Entries in andSelected are for rows that are either true and/or null. + * + * The flags andRepeating and andRepeatingIsNull track if the whole batch so far + * is true or null. + */ + boolean andRepeating = false; + boolean andRepeatingIsNull = false; + int andSel = 0; + Arrays.fill(intermediateNulls, 0, VectorizedRowBatch.DEFAULT_SIZE, false); + + outV.reset(); + + LongColumnVector firstColVector = (LongColumnVector) batch.cols[colNums[0]]; + long[] firstVector = firstColVector.vector; + + if (firstColVector.isRepeating) { + if (firstColVector.noNulls || !firstColVector.isNull[0]) { + if (firstVector[0] == 0) { + outV.isRepeating = true; + outputVector[0] = 0; + return; } - outV.isRepeating = false; - } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { - if (batch.selectedInUse) { - for (int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector1[i] & vector2Value; - } - } else { - for (int i = 0; i != n; i++) { - outputVector[i] = vector1[i] & vector2Value; + } else { + Preconditions.checkState(firstColVector.isNull[0]); + + // This stage null still could have influence even if andRepeating is also a null. + andRepeatingIsNull = true; + } + andRepeating = true; + } else if (firstColVector.noNulls) { + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (firstVector[i] == 1) { + andSelected[andSel++] = i; } } - outV.isRepeating = false; - } else /* neither side is repeating */{ - if (batch.selectedInUse) { - for (int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector1[i] & vector2[i]; - } - } else { - for (int i = 0; i != n; i++) { - outputVector[i] = vector1[i] & vector2[i]; + } else { + for (int i = 0; i != n; i++) { + if (firstVector[i] == 1) { + andSelected[andSel++] = i; } } - outV.isRepeating = false; } - outV.noNulls = true; - } else if (inputColVector1.noNulls && !inputColVector2.noNulls) { - // only input 2 side has nulls - if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { - // All must be selected otherwise size would be zero - // Repeating property will not change. - outV.isRepeating = true; - outputVector[0] = vector1[0] & vector2[0]; - outV.isNull[0] = (vector1[0] == 1) && inputColVector2.isNull[0]; - } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { - if (batch.selectedInUse) { - for (int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i]; - } - } else { - for (int i = 0; i != n; i++) { - outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i]; + } else { + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (firstColVector.isNull[i]) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } else if (firstVector[i] == 1) { + andSelected[andSel++] = i; } } - outV.isRepeating = false; - } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { - if (batch.selectedInUse) { - for (int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0]; - } - } else { - for (int i = 0; i != n; i++) { - outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0]; + } else { + for (int i = 0; i != n; i++) { + if (firstColVector.isNull[i]) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } else if (firstVector[i] == 1) { + andSelected[andSel++] = i; } } - outV.isRepeating = false; - } else /* neither side is repeating */{ - if (batch.selectedInUse) { - for (int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[i]; - } - } else { - for (int i = 0; i != n; i++) { - outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[i]; + } + } + + // Process input #2 and above. + + int colNum = 1; + do { + if (!andRepeating && andSel == 0) { + + // Nothing is true (how philosophical!). + break; + } + + if (childExpressions != null) { + int childExpressionIndex = mapToChildExpression[colNum]; + if (childExpressionIndex != -1) { + if (andRepeating) { + // We need to start with a full evaluate. + VectorExpression ve = childExpressions[childExpressionIndex]; + Preconditions.checkState(ve.getOutputColumn() == colNums[colNum]); + ve.evaluate(batch); + } else { + // Evaluate next child expression. + // But only andSelected (current true or true with nulls rows). + boolean saveSelectedInUse = batch.selectedInUse; + int[] saveSelected = sel; + batch.selectedInUse = true; + batch.selected = andSelected; + + VectorExpression ve = childExpressions[childExpressionIndex]; + Preconditions.checkState(ve.getOutputColumn() == colNums[colNum]); + ve.evaluate(batch); + + batch.selectedInUse = saveSelectedInUse; + batch.selected = saveSelected; } } - outV.isRepeating = false; } - outV.noNulls = false; - } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { - // only input 1 side has nulls - if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { - // All must be selected otherwise size would be zero - // Repeating property will not change. - outV.isRepeating = true; - outputVector[0] = vector1[0] & vector2[0]; - outV.isNull[0] = inputColVector1.isNull[0] && (vector2[0] == 1); - } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { - if (batch.selectedInUse) { - for (int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1); - } - } else { - for (int i = 0; i != n; i++) { - outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1); + + LongColumnVector nextColVector = (LongColumnVector) batch.cols[colNums[colNum]]; + long[] nextVector = nextColVector.vector; + + if (andRepeating) { + if (nextColVector.isRepeating) { + if (nextColVector.noNulls || !nextColVector.isNull[0]) { + if (nextVector[0] == 0) { + outV.isRepeating = true; + outputVector[0] = 0; + return; + } else { + // andRepeatingIsNull goes on to the next stage, too. + } + } else { + Preconditions.checkState(nextColVector.isNull[0]); + + // This stage null still could have influence even if andRepeating is also a null. + andRepeatingIsNull = true; } - } - outV.isRepeating = false; - } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { - if (batch.selectedInUse) { - for (int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1); + // Continue as andRepeating + } else if (nextColVector.noNulls) { + + // Switch away from andRepeating. + + Preconditions.checkState(andSel == 0); + andRepeating = false; + + if (andRepeatingIsNull) { + + // We only set intermediateNulls when the next row is true. + // The andSelected array will represent rows with true values and at least one null. + // If all future rounds are true, then the null will cause the result to be null. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (nextVector[i] == 1) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (nextVector[i] == 1) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } + } + } + andRepeatingIsNull = false; + } else { + + // Previous rounds were all true. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (nextVector[i] == 1) { + andSelected[andSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (nextVector[i] == 1) { + andSelected[andSel++] = i; + } + } + } } } else { - for (int i = 0; i != n; i++) { - outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1); + + // Can have nulls. + + // Switch away from andRepeating. + + Preconditions.checkState(andSel == 0); + andRepeating = false; + + if (andRepeatingIsNull) { + + // We only set intermediateNulls when the next row is true. + // The andSelected array will represent rows with true values and at least one null. + // If all future rounds are true, then the null will cause the result to be null. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (nextColVector.isNull[i] || nextVector[i] == 1) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (nextColVector.isNull[i] || nextVector[i] == 1) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } + } + } + andRepeatingIsNull = false; + } else { + + // Previous rounds were all true. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (nextColVector.isNull[i]) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } else if (nextVector[i] == 1) { + andSelected[andSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (nextColVector.isNull[i]) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } else if (nextVector[i] == 1) { + andSelected[andSel++] = i; + } + } + } } } - outV.isRepeating = false; - } else /* neither side is repeating */{ - if (batch.selectedInUse) { - for (int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 1); - } - } else { - for (int i = 0; i != n; i++) { - outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 1); + } else { + + // The andSelected array contains the rows that are currently true and possibly + // had some nulls. + + if (nextColVector.isRepeating) { + if (nextColVector.noNulls) { + if (nextVector[0] == 0) { + outV.isRepeating = true; + outputVector[0] = 0; + return; + } + } else if (nextColVector.isNull[0]) { + // Repeating null has influence over current andSelected entries. + for (int j = 0; j < andSel; j++) { + int i = andSelected[j]; + intermediateNulls[i] = true; + } } - } - outV.isRepeating = false; - } - outV.noNulls = false; - } else /* !inputColVector1.noNulls && !inputColVector2.noNulls */{ - // either input 1 or input 2 may have nulls - if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { - // All must be selected otherwise size would be zero - // Repeating property will not change. - outV.isRepeating = true; - outputVector[0] = vector1[0] & vector2[0]; - outV.isNull[0] = ((vector1[0] == 1) && inputColVector2.isNull[0]) - || (inputColVector1.isNull[0] && (vector2[0] == 1)) - || (inputColVector1.isNull[0] && inputColVector2.isNull[0]); - } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { - if (batch.selectedInUse) { - for (int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i]) - || (inputColVector1.isNull[0] && (vector2[i] == 1)) - || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); + // All prior rows in andSelected remain because they are true. They may have nulls. + } else if (nextColVector.noNulls) { + + // New round eliminates false rows. + int newSel = 0; + for (int j = 0; j < andSel; j++) { + int i = andSelected[j]; + if (nextVector[i] == 1) { + andSelected[newSel++] = i; + } } + andSel = newSel; } else { - for (int i = 0; i != n; i++) { - outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i]) - || (inputColVector1.isNull[0] && (vector2[i] == 1)) - || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); + + // Can have nulls. + + // New round eliminates sets true rows and notes nulls. + int newSel = 0; + for (int j = 0; j < andSel; j++) { + int i = andSelected[j]; + if (nextColVector.isNull[i]) { + // Keep the entry with true(s) and now has at least one null. + intermediateNulls[i] = true; + andSelected[newSel++] = i; + } else if (nextVector[i] == 1) { + // Continue on whether andSelected entry is true. It may have nulls. + andSelected[newSel++] = i; + } } + andSel = newSel; } - outV.isRepeating = false; - } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { - if (batch.selectedInUse) { - for (int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0]) - || (inputColVector1.isNull[i] && (vector2[0] == 1)) - || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); - } - } else { - for (int i = 0; i != n; i++) { - outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0]) - || (inputColVector1.isNull[i] && (vector2[0] == 1)) - || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); + } + } while (++colNum < colNums.length); + + // Produce final result. + + if (andRepeating) { + outV.isRepeating = true; + if (andRepeatingIsNull) { + outV.noNulls = false; + outV.isNull[0] = true; + } else { + outputVector[0] = 0; + } + } else if (andSel == 0) { + outV.isRepeating = true; + outputVector[0] = 0; + } else { + // Ok, some rows were true and/or null throughout. + int andIndex = 0; + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (andIndex < andSel && andSelected[andIndex] == i) { + if (intermediateNulls[i]) { + outV.noNulls = false; + outV.isNull[i] = true; + } else { + outputVector[i] = 1; + } + andIndex++; + } else { + outputVector[i] = 0; } } - outV.isRepeating = false; - } else /* neither side is repeating */{ - if (batch.selectedInUse) { - for (int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[i]) - || (inputColVector1.isNull[i] && (vector2[i] == 1)) - || (inputColVector1.isNull[i] && inputColVector2.isNull[i]); - } - } else { - for (int i = 0; i != n; i++) { - outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[i]) - || (inputColVector1.isNull[i] && (vector2[i] == 1)) - || (inputColVector1.isNull[i] && inputColVector2.isNull[i]); + Preconditions.checkState(andIndex == andSel); + } else { + Arrays.fill(outputVector, 0, n, 0); + for (int j = 0; j < andSel; j++) { + int i = andSelected[j]; + if (intermediateNulls[i]) { + outV.noNulls = false; + outV.isNull[i] = true; + } else { + outputVector[i] = 1; } } - outV.isRepeating = false; } - outV.noNulls = false; } } + static int fake = 0; + @Override public int getOutputColumn() { return outputColumn; @@ -293,22 +439,6 @@ public String getOutputType() { return "boolean"; } - public int getColNum1() { - return colNum1; - } - - public void setColNum1(int colNum1) { - this.colNum1 = colNum1; - } - - public int getColNum2() { - return colNum2; - } - - public void setColNum2(int colNum2) { - this.colNum2 = colNum2; - } - public void setOutputColumn(int outputColumn) { this.outputColumn = outputColumn; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java index 60ed2d4..ea637f3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java @@ -18,10 +18,14 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import com.google.common.base.Preconditions; + /** * This class performs OR expression on two input columns and stores, * the boolean output in a separate output column. The boolean values @@ -31,15 +35,19 @@ private static final long serialVersionUID = 1L; - private int colNum1; - private int colNum2; + private int[] colNums; private int outputColumn; + private int[] mapToChildExpression; + private int[] orSelected; + private boolean[] intermediateNulls; - public ColOrCol(int colNum1, int colNum2, int outputColumn) { + public ColOrCol(int[] colNums, int outputColumn) { this(); - this.colNum1 = colNum1; - this.colNum2 = colNum2; + this.colNums = colNums; this.outputColumn = outputColumn; + mapToChildExpression = null; + orSelected = new int[VectorizedRowBatch.DEFAULT_SIZE]; + intermediateNulls = new boolean[VectorizedRowBatch.DEFAULT_SIZE]; } public ColOrCol() { @@ -49,240 +57,397 @@ public ColOrCol() { @Override public void evaluate(VectorizedRowBatch batch) { + Preconditions.checkState(colNums.length >= 2); + + if (childExpressions != null && mapToChildExpression == null) { + // Some vector child expressions can be omitted (e.g. if they are existing boolean columns). + mapToChildExpression = new int [colNums.length]; + int childIndex = 0; + for (int i = 0; i < childExpressions.length; i++) { + VectorExpression ve = childExpressions[i]; + int outputColumn = ve.getOutputColumn(); + while (outputColumn != colNums[childIndex]) { + mapToChildExpression[childIndex++] = -1; + } + mapToChildExpression[childIndex++] = i; + } + } + + final int n = batch.size; + if (n <= 0) { + // Nothing to do + return; + } + if (childExpressions != null) { - super.evaluateChildren(batch); + // Evaluate first child expression. Other child are conditionally evaluated later + // based on whether there is still the possibility of any true and/or null results and + // only evaluated on current false and/or null rows. + int childExpressionIndex = mapToChildExpression[0]; + if (childExpressionIndex != -1) { + VectorExpression ve = childExpressions[childExpressionIndex]; + Preconditions.checkState(ve.getOutputColumn() == colNums[0]); + ve.evaluate(batch); + } } - LongColumnVector inputColVector1 = (LongColumnVector) batch.cols[colNum1]; - LongColumnVector inputColVector2 = (LongColumnVector) batch.cols[colNum2]; int[] sel = batch.selected; - int n = batch.size; - long[] vector1 = inputColVector1.vector; - long[] vector2 = inputColVector2.vector; LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; long[] outputVector = outV.vector; - if (n <= 0) { - // Nothing to do - return; + + /** + * Null processing complicates the algorithm here for Multi-OR. + * + * Any true --> true + * 0 or more false with 1 or more null --> result = null + * All false --> false + * + * We remember true as 1 in the outputVector because once set, it doesn't get unset. + * We remember nulls in the intermediateNulls so we can later ignore it if a row is true. + * Entries in orSelected are for rows that are either false and/or null. + * + * The flags orRepeating, and orRepeatingHasNulls track if the whole batch so far + * false and/or null. We are only tracking if there have been some nulls here. + */ + boolean orRepeating = false; + boolean orRepeatingHasNulls = false; + int orSel = 0; + Arrays.fill(intermediateNulls, 0, VectorizedRowBatch.DEFAULT_SIZE, false); + + // We set true rows as we find them. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = 0; + } + } else { + Arrays.fill(outputVector, 0, n, 0); } - long vector1Value = vector1[0]; - long vector2Value = vector2[0]; - if (inputColVector1.noNulls && inputColVector2.noNulls) { - if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { - // All must be selected otherwise size would be zero - // Repeating property will not change. - outV.isRepeating = true; - outputVector[0] = vector1[0] | vector2[0]; - } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { - if (batch.selectedInUse) { - for (int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector1Value | vector2[i]; - } - } else { - for (int i = 0; i != n; i++) { - outputVector[i] = vector1Value | vector2[i]; - } + outV.reset(); + + LongColumnVector firstColVector = (LongColumnVector) batch.cols[colNums[0]]; + long[] firstVector = firstColVector.vector; + + if (firstColVector.isRepeating) { + if (firstColVector.noNulls || !firstColVector.isNull[0]) { + if (firstVector[0] == 1) { + outV.isRepeating = true; + outputVector[0] = 1; + return; } - outV.isRepeating = false; - } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { - if (batch.selectedInUse) { - for (int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector1[i] | vector2Value; - } - } else { - for (int i = 0; i != n; i++) { - outputVector[i] = vector1[i] | vector2Value; + } else { + Preconditions.checkState(firstColVector.isNull[0]); + + // This stage null still could have influence even if andRepeating is also a null. + orRepeatingHasNulls = true; + } + orRepeating = true; + } else if (firstColVector.noNulls) { + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (firstVector[i] == 0) { + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; } } - outV.isRepeating = false; - } else /* neither side is repeating */{ - if (batch.selectedInUse) { - for (int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector1[i] | vector2[i]; - } - } else { - for (int i = 0; i != n; i++) { - outputVector[i] = vector1[i] | vector2[i]; + } else { + for (int i = 0; i != n; i++) { + if (firstVector[i] == 0) { + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; } } - outV.isRepeating = false; } - outV.noNulls = true; - } else if (inputColVector1.noNulls && !inputColVector2.noNulls) { - // only input 2 side has nulls - if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { - // All must be selected otherwise size would be zero - // Repeating property will not change. - outV.isRepeating = true; - outputVector[0] = vector1[0] | vector2[0]; - outV.isNull[0] = (vector1[0] == 0) && inputColVector2.isNull[0]; - } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { - if (batch.selectedInUse) { - for (int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector1Value | vector2[i]; - outV.isNull[i] = (vector1Value == 0) && inputColVector2.isNull[i]; - } - } else { - for (int i = 0; i != n; i++) { - outputVector[i] = vector1Value | vector2[i]; - outV.isNull[i] = (vector1Value == 0) && inputColVector2.isNull[i]; + } else { + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (firstColVector.isNull[i]) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else if (firstVector[i] == 0) { + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; } } - outV.isRepeating = false; - } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { - if (batch.selectedInUse) { - for (int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector1[i] | vector2Value; - outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[0]; - } - } else { - for (int i = 0; i != n; i++) { - outputVector[i] = vector1[i] | vector2Value; - outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[0]; + } else { + for (int i = 0; i != n; i++) { + if (firstColVector.isNull[i]) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else if (firstVector[i] == 0) { + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; } } - outV.isRepeating = false; - } else /* neither side is repeating */{ - if (batch.selectedInUse) { - for (int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector1[i] | vector2[i]; - outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[i]; - } - } else { - for (int i = 0; i != n; i++) { - outputVector[i] = vector1[i] | vector2[i]; - outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[i]; - } - } - outV.isRepeating = false; } - outV.noNulls = false; - } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { - // only input 1 side has nulls - if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { - // All must be selected otherwise size would be zero - // Repeating property will not change. - outV.isRepeating = true; - outputVector[0] = vector1[0] | vector2[0]; - outV.isNull[0] = inputColVector1.isNull[0] && (vector2[0] == 0); - } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { - if (batch.selectedInUse) { - for (int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector1Value | vector2[i]; - outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 0); - } - } else { - for (int i = 0; i != n; i++) { - outputVector[i] = vector1Value | vector2[i]; - outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 0); + } + + // Process input #2 and above. + + int colNum = 1; + do { + if (!orRepeating && orSel == 0) { + + // Everything is true (how philosophical!). + break; + } + + if (childExpressions != null) { + int childExpressionIndex = mapToChildExpression[colNum]; + if (childExpressionIndex != -1) { + if (orRepeating) { + // We need to start with a full evaluate. + VectorExpression ve = childExpressions[childExpressionIndex]; + Preconditions.checkState(ve.getOutputColumn() == colNums[colNum]); + ve.evaluate(batch); + } else { + // Evaluate next child expression. + // But only orSelected (current false or false with nulls rows). + boolean saveSelectedInUse = batch.selectedInUse; + int[] saveSelected = sel; + batch.selectedInUse = true; + batch.selected = orSelected; + + VectorExpression ve = childExpressions[childExpressionIndex]; + Preconditions.checkState(ve.getOutputColumn() == colNums[colNum]); + ve.evaluate(batch); + + batch.selectedInUse = saveSelectedInUse; + batch.selected = saveSelected; } } - outV.isRepeating = false; - } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { - if (batch.selectedInUse) { - for (int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector1[i] | vector2Value; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2Value == 0); - } - } else { - for (int i = 0; i != n; i++) { - outputVector[i] = vector1[i] | vector2Value; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2Value == 0); + } + + LongColumnVector nextColVector = (LongColumnVector) batch.cols[colNums[colNum]]; + long[] nextVector = nextColVector.vector; + + if (orRepeating) { + + // The orRepeating flag means the whole batch is false or nulls. + + if (nextColVector.isRepeating) { + if (nextColVector.noNulls || !nextColVector.isNull[0]) { + if (nextVector[0] == 1) { + outV.isRepeating = true; + outputVector[0] = 1; + return; + } else { + // The orRepeatingHasNulls flag goes on to the next stage, too. + } + } else { + Preconditions.checkState(nextColVector.isNull[0]); + + // This stage null still could have influence even if andRepeating is also a null. + orRepeatingHasNulls = true; } - } - outV.isRepeating = false; - } else /* neither side is repeating */{ - if (batch.selectedInUse) { - for (int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector1[i] | vector2[i]; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 0); + // Continue as orRepeating + } else if (nextColVector.noNulls) { + + // Switch away from orRepeating. + Preconditions.checkState(orSel == 0); + orRepeating = false; + + if (orRepeatingHasNulls) { + + // We only set intermediateNulls when the next row is false. + // The orSelected array will represent rows with false values and at least one null. + // If all future rounds are false, then the null will cause the result to be null. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (nextVector[i] == 0) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } else { + for (int i = 0; i != n; i++) { + if (nextVector[i] == 0) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } + orRepeatingHasNulls = false; + } else { + + // Previous rounds were all false. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (nextVector[i] == 0) { + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } else { + for (int i = 0; i != n; i++) { + if (nextVector[i] == 0) { + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } } } else { - for (int i = 0; i != n; i++) { - outputVector[i] = vector1[i] | vector2[i]; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 0); + + // Can have nulls. + + // Switch away from orRepeating. + Preconditions.checkState(orSel == 0); + orRepeating = false; + + if (orRepeatingHasNulls) { + + // We only set intermediateNulls when the next row is false. + // The orSelected array will represent rows with false values and at least one null. + // If all future rounds are false, then the null will cause the result to be null. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (nextColVector.isNull[i] || nextVector[i] == 0) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } else { + for (int i = 0; i != n; i++) { + if (nextColVector.isNull[i] || nextVector[i] == 0) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } + orRepeatingHasNulls = false; + } else { + + // Previous rounds were all false. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (nextColVector.isNull[i]) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else if (nextVector[i] == 0) { + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } else { + for (int i = 0; i != n; i++) { + if (nextColVector.isNull[i]) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else if (nextVector[i] == 0) { + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } } } - outV.isRepeating = false; - } - outV.noNulls = false; - } else /* !inputColVector1.noNulls && !inputColVector2.noNulls */{ - // either input 1 or input 2 may have nulls - if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { - // All must be selected otherwise size would be zero - // Repeating property will not change. - outV.isRepeating = true; - outputVector[0] = vector1[0] | vector2[0]; - outV.isNull[0] = ((vector1[0] == 0) && inputColVector2.isNull[0]) - || (inputColVector1.isNull[0] && (vector2[0] == 0)) - || (inputColVector1.isNull[0] && inputColVector2.isNull[0]); - } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { - if (batch.selectedInUse) { - for (int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector1Value | vector2[i]; - outV.isNull[i] = ((vector1[0] == 0) && inputColVector2.isNull[i]) - || (inputColVector1.isNull[0] && (vector2[i] == 0)) - || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); - } - } else { - for (int i = 0; i != n; i++) { - outputVector[i] = vector1Value | vector2[i]; - outV.isNull[i] = ((vector1[0] == 0) && inputColVector2.isNull[i]) - || (inputColVector1.isNull[0] && (vector2[i] == 0)) - || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); + } else { + + // The outputVector array contains the rows that are currently true and the rows + // are false and/or have null rows are represented in the intermediateNulls and + // orSelected arrays. + + if (nextColVector.isRepeating) { + if (nextColVector.noNulls) { + if (nextVector[0] == 1) { + outV.isRepeating = true; + outputVector[0] = 1; + return; + } + } else if (nextColVector.isNull[0]) { + + // Repeating null has influence over current orSelected entries. + for (int j = 0; j < orSel; j++) { + int i = orSelected[j]; + intermediateNulls[i] = true; + } } - } - outV.isRepeating = false; - } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { - if (batch.selectedInUse) { - for (int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector1[i] | vector2Value; - outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[0]) - || (inputColVector1.isNull[i] && (vector2[0] == 0)) - || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); + } else if (nextColVector.noNulls) { + + // Set true entries in output vector. + int newSel = 0; + for (int j = 0; j < orSel; j++) { + int i = orSelected[j]; + if (nextVector[i] == 0) { + orSelected[newSel++] = i; + } else { + outputVector[i] = 1; + } } + orSel = newSel; } else { - for (int i = 0; i != n; i++) { - outputVector[i] = vector1[i] | vector2Value; - outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[0]) - || (inputColVector1.isNull[i] && (vector2[0] == 0)) - || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); + + // Can have nulls. + + // New round eliminates sets true rows and notes nulls. + int newSel = 0; + for (int j = 0; j < orSel; j++) { + int i = orSelected[j]; + if (nextColVector.isNull[i]) { + // Keep the entry with true(s) and now has at least one null. + intermediateNulls[i] = true; + orSelected[newSel++] = i; + } else if (nextVector[i] == 0) { + // Continue on whether orSelected entry is false. It may have nulls. + orSelected[newSel++] = i; + } else { + outputVector[i] = 1; + } } + orSel = newSel; } - outV.isRepeating = false; - } else /* neither side is repeating */{ - if (batch.selectedInUse) { - for (int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector1[i] | vector2[i]; - outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[i]) - || (inputColVector1.isNull[i] && (vector2[i] == 0)) - || (inputColVector1.isNull[i] && inputColVector2.isNull[i]); - } - } else { - for (int i = 0; i != n; i++) { - outputVector[i] = vector1[i] | vector2[i]; - outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[i]) - || (inputColVector1.isNull[i] && (vector2[i] == 0)) - || (inputColVector1.isNull[i] && inputColVector2.isNull[i]); - } + } + } while (++colNum < colNums.length); + + // Produce final result. + + if (orRepeating) { + + // The orRepeating flags means the whole batch is false and may have nulls. + outV.isRepeating = true; + if (orRepeatingHasNulls) { + outV.noNulls = false; + outV.isNull[0] = true; + } else { + outputVector[0] = 0; + } + } else { + + // When there is a false entry and there were intermediate nulls, the result row will be null. + // Note the true entries were already set in outputVector as we processed the rounds above. + for (int j = 0; j < orSel; j++) { + int i = orSelected[j]; + Preconditions.checkState(outputVector[i] == 0); + if (intermediateNulls[i]) { + outV.noNulls = false; + outV.isNull[i] = true; } - outV.isRepeating = false; } - outV.noNulls = false; } } @@ -296,22 +461,6 @@ public String getOutputType() { return "boolean"; } - public int getColNum1() { - return colNum1; - } - - public void setColNum1(int colNum1) { - this.colNum1 = colNum1; - } - - public int getColNum2() { - return colNum2; - } - - public void setColNum2(int colNum2) { - this.colNum2 = colNum2; - } - public void setOutputColumn(int outputColumn) { this.outputColumn = outputColumn; } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java index e4c7529..289c66d 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java @@ -627,8 +627,6 @@ public void testVectorizeAndOrProjectionExpression() throws HiveException { assertEquals(veAnd.getClass(), ColAndCol.class); assertEquals(1, veAnd.getChildExpressions().length); assertEquals(veAnd.getChildExpressions()[0].getClass(), LongColGreaterLongScalar.class); - assertEquals(2, ((ColAndCol) veAnd).getColNum1()); - assertEquals(1, ((ColAndCol) veAnd).getColNum2()); assertEquals(3, ((ColAndCol) veAnd).getOutputColumn()); //OR @@ -652,8 +650,6 @@ public void testVectorizeAndOrProjectionExpression() throws HiveException { assertEquals(veOr.getClass(), ColOrCol.class); assertEquals(1, veAnd.getChildExpressions().length); assertEquals(veAnd.getChildExpressions()[0].getClass(), LongColGreaterLongScalar.class); - assertEquals(2, ((ColOrCol) veOr).getColNum1()); - assertEquals(1, ((ColOrCol) veOr).getColNum2()); assertEquals(3, ((ColOrCol) veOr).getOutputColumn()); } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java index 7d54a9c..16c4198 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java @@ -40,7 +40,7 @@ @Test public void testLongColOrLongCol() { VectorizedRowBatch batch = getBatchThreeBooleanCols(); - ColOrCol expr = new ColOrCol(0, 1, 2); + ColOrCol expr = new ColOrCol(new int[] {0, 1}, 2); LongColumnVector outCol = (LongColumnVector) batch.cols[2]; expr.evaluate(batch); // verify @@ -96,7 +96,7 @@ public void testLongColOrLongCol() { @Test public void testLongColAndLongCol() { VectorizedRowBatch batch = getBatchThreeBooleanCols(); - ColAndCol expr = new ColAndCol(0, 1, 2); + ColAndCol expr = new ColAndCol(new int[] {0, 1}, 2); LongColumnVector outCol = (LongColumnVector) batch.cols[2]; expr.evaluate(batch); diff --git ql/src/test/queries/clientpositive/vector_multi_and_projection.q ql/src/test/queries/clientpositive/vector_multi_and_projection.q new file mode 100644 index 0000000..56164de --- /dev/null +++ ql/src/test/queries/clientpositive/vector_multi_and_projection.q @@ -0,0 +1,216 @@ +set hive.cli.print.header=true; +set hive.explain.user=false; +SET hive.auto.convert.join=true; +set hive.fetch.task.conversion=none; +set hive.mapred.mode=nonstrict; + +-- SORT_QUERY_RESULTS + +create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k; + +CREATE TABLE scratch AS SELECT t, si, i, b, bo FROM vectortab2k; +INSERT INTO TABLE scratch VALUES (NULL, NULL, NULL, NULL, NULL); + +CREATE TABLE vectortab2k_orc STORED AS ORC AS SELECT * FROM scratch; + +SET hive.vectorized.execution.enabled=true; + +EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc + order by t, si, i) as q; + +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc + order by t, si, i) as q; + +EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 AND si > 0 AND i < 0 AND b > 0) as multi_and_col from vectortab2k_orc + order by t, si, i, b) as q; + +SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 AND si > 0 AND i < 0 AND b > 0) as multi_and_col from vectortab2k_orc + order by t, si, i, b) as q; + +-- Use a boolean column rather than a column comparison expression. +EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, bo, b, (t < 0 AND si > 0 AND bo AND b > 0) as multi_and_col from vectortab2k_orc + order by t, si, bo, b) as q; + +SELECT sum(hash(*)) FROM + (SELECT t, si, bo, b, (t < 0 AND si > 0 AND bo AND b > 0) as multi_and_col from vectortab2k_orc + order by t, si, bo, b) as q; + +-- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc + where pmod(t, 4) > 1 + order by t, si, i) as q; +SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 AND si > 0 AND i < 0 AND b > 0) as multi_and_col from vectortab2k_orc + where pmod(t, 4) < 2 + order by t, si, i, b) as q; +SELECT sum(hash(*)) FROM + (SELECT t, si, bo, b, (t < 0 AND si > 0 AND bo AND b > 0) as multi_and_col from vectortab2k_orc + where pmod(b, 8) == 7 + order by t, si, bo, b) as q; + +SET hive.vectorized.execution.enabled=false; + +CREATE TABLE scratch_repeat AS SELECT t, si, i, b, bo, 20 as t_repeat, + 9000 as si_repeat, 9233320 as i_repeat, -823823999339992 as b_repeat, false as bo_repeat_false, true as bo_repeat_true FROM vectortab2k; + +-- The repeated columns ought to create repeated VectorizedRowBatch for those columns. +-- And then when we do a comparison, we should generate a repeated boolean result. +CREATE TABLE vectortab2k_orc_repeat STORED AS ORC AS SELECT * FROM scratch_repeat; + +SET hive.vectorized.execution.enabled=true; + +-- t_repeat > 0 should generate all true. +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat > 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_repeat + order by t_repeat, si, i) as q; + +-- t_repeat < 0 should generate all false. +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_repeat + order by t_repeat, si, i) as q; + +-- Two repeated false columns at beginning... +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si_repeat, i, (t_repeat > 0 AND si_repeat > 0 AND i < 0) as multi_and_col from vectortab2k_orc_repeat + order by t_repeat, si_repeat, i) as q; + +-- si_repeat > 0 should generate all true. +SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b_repeat, (t < 0 AND si_repeat > 0 AND i < 0 AND b_repeat > 0) as multi_and_col from vectortab2k_orc_repeat + order by t, si_repeat, i, b_repeat) as q; + +-- si_repeat < 0 should generate all false. +SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b_repeat, (t < 0 AND si_repeat < 0 AND i < 0 AND b_repeat > 0) as multi_and_col from vectortab2k_orc_repeat + order by t, si_repeat, i, b_repeat) as q; + +-- Use a boolean column rather than a column comparison expression. +SELECT sum(hash(*)) FROM + (SELECT t, si, bo_repeat_false, b, (t < 0 AND si > 0 AND bo_repeat_false AND b > 0) as multi_and_col from vectortab2k_orc_repeat + order by t, si, bo_repeat_false, b) as q; + +SELECT sum(hash(*)) FROM + (SELECT t, si, bo_repeat_true, b, (t < 0 AND si > 0 AND bo_repeat_true AND b > 0) as multi_and_col from vectortab2k_orc_repeat + order by t, si, bo_repeat_true, b) as q; + +-- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat > 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(si, 4) = 0 + order by t_repeat, si, i) as q; +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(si, 4) = 3 + order by t_repeat, si, i) as q; +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si_repeat, i, (t_repeat > 0 AND si_repeat > 0 AND i < 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(si, 4) != 3 + order by t_repeat, si_repeat, i) as q; +SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b_repeat, (t < 0 AND si_repeat > 0 AND i < 0 AND b_repeat > 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(si, 4) < 2 + order by t, si_repeat, i, b_repeat) as q; +SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b_repeat, (t < 0 AND si_repeat < 0 AND i < 0 AND b_repeat > 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(t, 4) = 0 + order by t, si_repeat, i, b_repeat) as q; +SELECT sum(hash(*)) FROM + (SELECT t, si, bo_repeat_false, b, (t < 0 AND si > 0 AND bo_repeat_false AND b > 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(b, 4) > 1 + order by t, si, bo_repeat_false, b) as q; +SELECT sum(hash(*)) FROM + (SELECT t, si, bo_repeat_true, b, (t < 0 AND si > 0 AND bo_repeat_true AND b > 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(si, 4) < 3 + order by t, si, bo_repeat_true, b) as q; + + +SET hive.vectorized.execution.enabled=false; + +CREATE TABLE scratch_null AS SELECT t, si, i, b, bo, + cast(null as tinyint) as t_null, cast(null as smallint) as si_null, cast(null as int) as i_null, cast(null as bigint) as b_null, cast(null as boolean) as bo_null FROM vectortab2k; + +-- The nulled columns ought to create repeated null VectorizedRowBatch for those columns. +CREATE TABLE vectortab2k_orc_null STORED AS ORC AS SELECT * FROM scratch_null; + +SET hive.vectorized.execution.enabled=true; + +EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null is null AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q; + +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null is null AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q; + +EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q; + +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q; + +EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 AND si_null > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i) as q; + +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 AND si_null > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i) as q; + +EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (t_null < 0 AND si_null > 0 AND i_null < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i_null) as q; + +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (t_null < 0 AND si_null > 0 AND i_null < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i_null) as q; + +-- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null is null AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) = 3 + order by t_null, si, i) as q; +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) = 2 + order by t_null, si, i) as q; +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 AND si_null > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) != 3 + order by t_null, si_null, i) as q; +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 AND si_null > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) < 3 + order by t_null, si_null, i) as q; + diff --git ql/src/test/queries/clientpositive/vector_multi_or_projection.q ql/src/test/queries/clientpositive/vector_multi_or_projection.q new file mode 100644 index 0000000..e6cfe2d --- /dev/null +++ ql/src/test/queries/clientpositive/vector_multi_or_projection.q @@ -0,0 +1,218 @@ +set hive.cli.print.header=true; +set hive.explain.user=false; +SET hive.auto.convert.join=true; +set hive.fetch.task.conversion=none; +set hive.mapred.mode=nonstrict; + +-- SORT_QUERY_RESULTS + +create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k; + +CREATE TABLE scratch AS SELECT t, si, i, b, bo FROM vectortab2k; +INSERT INTO TABLE scratch VALUES (NULL, NULL, NULL, NULL, NULL); + +CREATE TABLE vectortab2k_orc STORED AS ORC AS SELECT * FROM scratch; + +SET hive.vectorized.execution.enabled=true; + +EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc + order by t, si, i) as q; + +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc + order by t, si, i) as q; + +EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 OR si > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc + order by t, si, i, b) as q; + +SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 OR si > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc + order by t, si, i, b) as q; + +-- Use a boolean column rather than a column comparison expression. +EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, bo, b, (t < 0 OR si > 0 OR bo OR b > 0) as multi_or_col from vectortab2k_orc + order by t, si, bo, b) as q; + +SELECT sum(hash(*)) FROM + (SELECT t, si, bo, b, (t < 0 OR si > 0 OR bo OR b > 0) as multi_or_col from vectortab2k_orc + order by t, si, bo, b) as q; + +-- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc + where pmod(i,4) = 2 + order by t, si, i) as q; +SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 OR si > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc + where pmod(si,4) < 2 + order by t, si, i, b) as q; +SELECT sum(hash(*)) FROM + (SELECT t, si, bo, b, (t < 0 OR si > 0 OR bo OR b > 0) as multi_or_col from vectortab2k_orc + where pmod(i,4) = 2 + order by t, si, bo, b) as q; + + +SET hive.vectorized.execution.enabled=false; + +CREATE TABLE scratch_repeat AS SELECT t, si, i, b, bo, 20 as t_repeat, + 9000 as si_repeat, 9233320 as i_repeat, -823823999339992 as b_repeat, false as bo_repeat_false, true as bo_repeat_true FROM vectortab2k; + +-- The repeated columns ought to create repeated VectorizedRowBatch for those columns. +-- And then when we do a comparison, we should generate a repeated boolean result. +CREATE TABLE vectortab2k_orc_repeat STORED AS ORC AS SELECT * FROM scratch_repeat; + +SET hive.vectorized.execution.enabled=true; + +-- t_repeat < 0 should generate all false. +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc_repeat + order by t_repeat, si, i) as q; + +-- t_repeat > 0 should generate all true. +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat > 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc_repeat + order by t_repeat, si, i) as q; + +-- Two repeated false columns at beginning... +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si_repeat, i, (t_repeat < 0 OR si_repeat < 0 OR i < 0) as multi_or_col from vectortab2k_orc_repeat + order by t_repeat, si_repeat, i) as q; + +-- si_repeat > 0 should generate all true. +SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b, (t < 0 OR si_repeat > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc_repeat + order by t, si_repeat, i, b) as q; + +-- si_repeat < 0 should generate all false. +SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b, (t < 0 OR si_repeat < 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc_repeat + order by t, si_repeat, i, b) as q; + +-- Use a boolean column rather than a column comparison expression. +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, bo_repeat_false, b, (t_repeat > 0 OR si > 0 OR bo_repeat_false OR b > 0) as multi_or_col from vectortab2k_orc_repeat + order by t_repeat, si, bo_repeat_false, b) as q; + +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, bo_repeat_true, b, (t_repeat > 0 OR si > 0 OR bo_repeat_true OR b > 0) as multi_or_col from vectortab2k_orc_repeat + order by t_repeat, si, bo_repeat_true, b) as q; + +-- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(i,4) > 1 + order by t_repeat, si, i) as q; +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat > 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(si,4) > 1 + order by t_repeat, si, i) as q; +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si_repeat, i, (t_repeat < 0 OR si_repeat < 0 OR i < 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(i,4) > 1 + order by t_repeat, si_repeat, i) as q; +SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b, (t < 0 OR si_repeat > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(t,4) > 1 + order by t, si_repeat, i, b) as q; +SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b, (t < 0 OR si_repeat < 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(b,4) > 1 + order by t, si_repeat, i, b) as q; +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, bo_repeat_false, b, (t_repeat > 0 OR si > 0 OR bo_repeat_false OR b > 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(si,4) > 1 + order by t_repeat, si, bo_repeat_false, b) as q; +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, bo_repeat_true, b, (t_repeat > 0 OR si > 0 OR bo_repeat_true OR b > 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(si,4) < 2 + order by t_repeat, si, bo_repeat_true, b) as q; + + +SET hive.vectorized.execution.enabled=false; + +CREATE TABLE scratch_null AS SELECT t, si, i, b, bo, + cast(null as tinyint) as t_null, cast(null as smallint) as si_null, cast(null as int) as i_null, cast(null as bigint) as b_null, cast(null as boolean) as bo_null FROM vectortab2k; + +-- The nulled columns ought to create repeated null VectorizedRowBatch for those columns. +CREATE TABLE vectortab2k_orc_null STORED AS ORC AS SELECT * FROM scratch_null; + +SET hive.vectorized.execution.enabled=true; + + +EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null is null OR si > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q; + +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null is null OR si > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q; + +EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null < 0 OR si > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q; + +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null < 0 OR si > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q; + +EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 OR si_null > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i) as q; + +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 OR si_null > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i) as q; + +EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (t_null < 0 OR si_null > 0 OR i_null < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i_null) as q; + +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (t_null < 0 OR si_null > 0 OR i_null < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i_null) as q; + + +-- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null is null OR si > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) = 2 + order by t_null, si, i) as q; +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null < 0 OR si > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) = 3 + order by t_null, si, i) as q; +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 OR si_null > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) = 0 + order by t_null, si_null, i) as q; +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (t_null < 0 OR si_null > 0 OR i_null < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) != 2 + order by t_null, si_null, i_null) as q; diff --git ql/src/test/results/clientpositive/tez/vector_multi_and_projection.q.out ql/src/test/results/clientpositive/tez/vector_multi_and_projection.q.out new file mode 100644 index 0000000..144b0c1 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_multi_and_projection.q.out @@ -0,0 +1,1066 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2k +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@vectortab2k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@vectortab2k +PREHOOK: query: CREATE TABLE scratch AS SELECT t, si, i, b, bo FROM vectortab2k +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@vectortab2k +PREHOOK: Output: database:default +PREHOOK: Output: default@scratch +POSTHOOK: query: CREATE TABLE scratch AS SELECT t, si, i, b, bo FROM vectortab2k +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@vectortab2k +POSTHOOK: Output: database:default +POSTHOOK: Output: default@scratch +POSTHOOK: Lineage: scratch.b SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: scratch.bo SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: scratch.i SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: scratch.si SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: scratch.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] +t si i b bo +PREHOOK: query: INSERT INTO TABLE scratch VALUES (NULL, NULL, NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@scratch +POSTHOOK: query: INSERT INTO TABLE scratch VALUES (NULL, NULL, NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@scratch +POSTHOOK: Lineage: scratch.b EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: scratch.bo EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col5, type:string, comment:), ] +POSTHOOK: Lineage: scratch.i EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: scratch.si EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: scratch.t EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +_col0 _col1 _col2 _col3 _col4 +PREHOOK: query: CREATE TABLE vectortab2k_orc STORED AS ORC AS SELECT * FROM scratch +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@scratch +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2k_orc +POSTHOOK: query: CREATE TABLE vectortab2k_orc STORED AS ORC AS SELECT * FROM scratch +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@scratch +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2k_orc +POSTHOOK: Lineage: vectortab2k_orc.b SIMPLE [(scratch)scratch.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc.bo SIMPLE [(scratch)scratch.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc.i SIMPLE [(scratch)scratch.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc.si SIMPLE [(scratch)scratch.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc.t SIMPLE [(scratch)scratch.FieldSchema(name:t, type:tinyint, comment:null), ] +scratch.t scratch.si scratch.i scratch.b scratch.bo +PREHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc + order by t, si, i) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc + order by t, si, i) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vectortab2k_orc + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: tinyint), si (type: smallint), i (type: int), ((t < 0) and (si > 0) and (i < 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) + sort order: +++ + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: boolean) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc + order by t, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc + order by t, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +c0 +22128070258 +PREHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 AND si > 0 AND i < 0 AND b > 0) as multi_and_col from vectortab2k_orc + order by t, si, i, b) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 AND si > 0 AND i < 0 AND b > 0) as multi_and_col from vectortab2k_orc + order by t, si, i, b) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vectortab2k_orc + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), ((t < 0) and (si > 0) and (i < 0) and (b > 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint) + sort order: ++++ + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: boolean) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,KEY.reducesinkkey3,VALUE._col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 AND si > 0 AND i < 0 AND b > 0) as multi_and_col from vectortab2k_orc + order by t, si, i, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 AND si > 0 AND i < 0 AND b > 0) as multi_and_col from vectortab2k_orc + order by t, si, i, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +c0 +-1983102543 +PREHOOK: query: -- Use a boolean column rather than a column comparison expression. +EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, bo, b, (t < 0 AND si > 0 AND bo AND b > 0) as multi_and_col from vectortab2k_orc + order by t, si, bo, b) as q +PREHOOK: type: QUERY +POSTHOOK: query: -- Use a boolean column rather than a column comparison expression. +EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, bo, b, (t < 0 AND si > 0 AND bo AND b > 0) as multi_and_col from vectortab2k_orc + order by t, si, bo, b) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vectortab2k_orc + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: tinyint), si (type: smallint), bo (type: boolean), b (type: bigint), ((t < 0) and (si > 0) and bo and (b > 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: boolean), _col3 (type: bigint) + sort order: ++++ + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: boolean) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,KEY.reducesinkkey3,VALUE._col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, bo, b, (t < 0 AND si > 0 AND bo AND b > 0) as multi_and_col from vectortab2k_orc + order by t, si, bo, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, bo, b, (t < 0 AND si > 0 AND bo AND b > 0) as multi_and_col from vectortab2k_orc + order by t, si, bo, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +c0 +30392092250 +PREHOOK: query: -- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc + where pmod(t, 4) > 1 + order by t, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +POSTHOOK: query: -- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc + where pmod(t, 4) > 1 + order by t, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +c0 +4766599363 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 AND si > 0 AND i < 0 AND b > 0) as multi_and_col from vectortab2k_orc + where pmod(t, 4) < 2 + order by t, si, i, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 AND si > 0 AND i < 0 AND b > 0) as multi_and_col from vectortab2k_orc + where pmod(t, 4) < 2 + order by t, si, i, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +c0 +29923525611 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, bo, b, (t < 0 AND si > 0 AND bo AND b > 0) as multi_and_col from vectortab2k_orc + where pmod(b, 8) == 7 + order by t, si, bo, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, bo, b, (t < 0 AND si > 0 AND bo AND b > 0) as multi_and_col from vectortab2k_orc + where pmod(b, 8) == 7 + order by t, si, bo, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +c0 +1048488580 +PREHOOK: query: CREATE TABLE scratch_repeat AS SELECT t, si, i, b, bo, 20 as t_repeat, + 9000 as si_repeat, 9233320 as i_repeat, -823823999339992 as b_repeat, false as bo_repeat_false, true as bo_repeat_true FROM vectortab2k +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@vectortab2k +PREHOOK: Output: database:default +PREHOOK: Output: default@scratch_repeat +POSTHOOK: query: CREATE TABLE scratch_repeat AS SELECT t, si, i, b, bo, 20 as t_repeat, + 9000 as si_repeat, 9233320 as i_repeat, -823823999339992 as b_repeat, false as bo_repeat_false, true as bo_repeat_true FROM vectortab2k +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@vectortab2k +POSTHOOK: Output: database:default +POSTHOOK: Output: default@scratch_repeat +POSTHOOK: Lineage: scratch_repeat.b SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: scratch_repeat.b_repeat SIMPLE [] +POSTHOOK: Lineage: scratch_repeat.bo SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: scratch_repeat.bo_repeat_false SIMPLE [] +POSTHOOK: Lineage: scratch_repeat.bo_repeat_true SIMPLE [] +POSTHOOK: Lineage: scratch_repeat.i SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: scratch_repeat.i_repeat SIMPLE [] +POSTHOOK: Lineage: scratch_repeat.si SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: scratch_repeat.si_repeat SIMPLE [] +POSTHOOK: Lineage: scratch_repeat.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: scratch_repeat.t_repeat SIMPLE [] +t si i b bo t_repeat si_repeat i_repeat b_repeat bo_repeat_false bo_repeat_true +PREHOOK: query: -- The repeated columns ought to create repeated VectorizedRowBatch for those columns. +-- And then when we do a comparison, we should generate a repeated boolean result. +CREATE TABLE vectortab2k_orc_repeat STORED AS ORC AS SELECT * FROM scratch_repeat +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@scratch_repeat +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2k_orc_repeat +POSTHOOK: query: -- The repeated columns ought to create repeated VectorizedRowBatch for those columns. +-- And then when we do a comparison, we should generate a repeated boolean result. +CREATE TABLE vectortab2k_orc_repeat STORED AS ORC AS SELECT * FROM scratch_repeat +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@scratch_repeat +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2k_orc_repeat +POSTHOOK: Lineage: vectortab2k_orc_repeat.b SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.b_repeat SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:b_repeat, type:bigint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.bo SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.bo_repeat_false SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:bo_repeat_false, type:boolean, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.bo_repeat_true SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:bo_repeat_true, type:boolean, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.i SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.i_repeat SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:i_repeat, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.si SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.si_repeat SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:si_repeat, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.t SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.t_repeat SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:t_repeat, type:int, comment:null), ] +scratch_repeat.t scratch_repeat.si scratch_repeat.i scratch_repeat.b scratch_repeat.bo scratch_repeat.t_repeat scratch_repeat.si_repeat scratch_repeat.i_repeat scratch_repeat.b_repeat scratch_repeat.bo_repeat_false scratch_repeat.bo_repeat_true +PREHOOK: query: -- t_repeat > 0 should generate all true. +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat > 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_repeat + order by t_repeat, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: -- t_repeat > 0 should generate all true. +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat > 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_repeat + order by t_repeat, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +23302550859 +PREHOOK: query: -- t_repeat < 0 should generate all false. +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_repeat + order by t_repeat, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: -- t_repeat < 0 should generate all false. +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_repeat + order by t_repeat, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +23302550427 +PREHOOK: query: -- Two repeated false columns at beginning... +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si_repeat, i, (t_repeat > 0 AND si_repeat > 0 AND i < 0) as multi_and_col from vectortab2k_orc_repeat + order by t_repeat, si_repeat, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: -- Two repeated false columns at beginning... +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si_repeat, i, (t_repeat > 0 AND si_repeat > 0 AND i < 0) as multi_and_col from vectortab2k_orc_repeat + order by t_repeat, si_repeat, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +15090004243 +PREHOOK: query: -- si_repeat > 0 should generate all true. +SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b_repeat, (t < 0 AND si_repeat > 0 AND i < 0 AND b_repeat > 0) as multi_and_col from vectortab2k_orc_repeat + order by t, si_repeat, i, b_repeat) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: -- si_repeat > 0 should generate all true. +SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b_repeat, (t < 0 AND si_repeat > 0 AND i < 0 AND b_repeat > 0) as multi_and_col from vectortab2k_orc_repeat + order by t, si_repeat, i, b_repeat) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +-4908324365 +PREHOOK: query: -- si_repeat < 0 should generate all false. +SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b_repeat, (t < 0 AND si_repeat < 0 AND i < 0 AND b_repeat > 0) as multi_and_col from vectortab2k_orc_repeat + order by t, si_repeat, i, b_repeat) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: -- si_repeat < 0 should generate all false. +SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b_repeat, (t < 0 AND si_repeat < 0 AND i < 0 AND b_repeat > 0) as multi_and_col from vectortab2k_orc_repeat + order by t, si_repeat, i, b_repeat) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +-4908324365 +PREHOOK: query: -- Use a boolean column rather than a column comparison expression. +SELECT sum(hash(*)) FROM + (SELECT t, si, bo_repeat_false, b, (t < 0 AND si > 0 AND bo_repeat_false AND b > 0) as multi_and_col from vectortab2k_orc_repeat + order by t, si, bo_repeat_false, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: -- Use a boolean column rather than a column comparison expression. +SELECT sum(hash(*)) FROM + (SELECT t, si, bo_repeat_false, b, (t < 0 AND si > 0 AND bo_repeat_false AND b > 0) as multi_and_col from vectortab2k_orc_repeat + order by t, si, bo_repeat_false, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +30391190705 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, bo_repeat_true, b, (t < 0 AND si > 0 AND bo_repeat_true AND b > 0) as multi_and_col from vectortab2k_orc_repeat + order by t, si, bo_repeat_true, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, bo_repeat_true, b, (t < 0 AND si > 0 AND bo_repeat_true AND b > 0) as multi_and_col from vectortab2k_orc_repeat + order by t, si, bo_repeat_true, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +30393113000 +PREHOOK: query: -- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat > 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(si, 4) = 0 + order by t_repeat, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: -- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat > 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(si, 4) = 0 + order by t_repeat, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +43680228 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(si, 4) = 3 + order by t_repeat, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(si, 4) = 3 + order by t_repeat, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +7322792767 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_repeat, si_repeat, i, (t_repeat > 0 AND si_repeat > 0 AND i < 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(si, 4) != 3 + order by t_repeat, si_repeat, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_repeat, si_repeat, i, (t_repeat > 0 AND si_repeat > 0 AND i < 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(si, 4) != 3 + order by t_repeat, si_repeat, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +2570758619 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b_repeat, (t < 0 AND si_repeat > 0 AND i < 0 AND b_repeat > 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(si, 4) < 2 + order by t, si_repeat, i, b_repeat) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b_repeat, (t < 0 AND si_repeat > 0 AND i < 0 AND b_repeat > 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(si, 4) < 2 + order by t, si_repeat, i, b_repeat) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +37376898668 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b_repeat, (t < 0 AND si_repeat < 0 AND i < 0 AND b_repeat > 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(t, 4) = 0 + order by t, si_repeat, i, b_repeat) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b_repeat, (t < 0 AND si_repeat < 0 AND i < 0 AND b_repeat > 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(t, 4) = 0 + order by t, si_repeat, i, b_repeat) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +16927876919 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, bo_repeat_false, b, (t < 0 AND si > 0 AND bo_repeat_false AND b > 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(b, 4) > 1 + order by t, si, bo_repeat_false, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, bo_repeat_false, b, (t < 0 AND si > 0 AND bo_repeat_false AND b > 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(b, 4) > 1 + order by t, si, bo_repeat_false, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +-12904162858 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, bo_repeat_true, b, (t < 0 AND si > 0 AND bo_repeat_true AND b > 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(si, 4) < 3 + order by t, si, bo_repeat_true, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, bo_repeat_true, b, (t < 0 AND si > 0 AND bo_repeat_true AND b > 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(si, 4) < 3 + order by t, si, bo_repeat_true, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +12129190878 +PREHOOK: query: CREATE TABLE scratch_null AS SELECT t, si, i, b, bo, + cast(null as tinyint) as t_null, cast(null as smallint) as si_null, cast(null as int) as i_null, cast(null as bigint) as b_null, cast(null as boolean) as bo_null FROM vectortab2k +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@vectortab2k +PREHOOK: Output: database:default +PREHOOK: Output: default@scratch_null +POSTHOOK: query: CREATE TABLE scratch_null AS SELECT t, si, i, b, bo, + cast(null as tinyint) as t_null, cast(null as smallint) as si_null, cast(null as int) as i_null, cast(null as bigint) as b_null, cast(null as boolean) as bo_null FROM vectortab2k +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@vectortab2k +POSTHOOK: Output: database:default +POSTHOOK: Output: default@scratch_null +POSTHOOK: Lineage: scratch_null.b SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: scratch_null.b_null EXPRESSION [] +POSTHOOK: Lineage: scratch_null.bo SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: scratch_null.bo_null EXPRESSION [] +POSTHOOK: Lineage: scratch_null.i SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: scratch_null.i_null EXPRESSION [] +POSTHOOK: Lineage: scratch_null.si SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: scratch_null.si_null EXPRESSION [] +POSTHOOK: Lineage: scratch_null.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: scratch_null.t_null EXPRESSION [] +t si i b bo t_null si_null i_null b_null bo_null +PREHOOK: query: -- The nulled columns ought to create repeated null VectorizedRowBatch for those columns. +CREATE TABLE vectortab2k_orc_null STORED AS ORC AS SELECT * FROM scratch_null +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@scratch_null +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2k_orc_null +POSTHOOK: query: -- The nulled columns ought to create repeated null VectorizedRowBatch for those columns. +CREATE TABLE vectortab2k_orc_null STORED AS ORC AS SELECT * FROM scratch_null +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@scratch_null +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2k_orc_null +POSTHOOK: Lineage: vectortab2k_orc_null.b SIMPLE [(scratch_null)scratch_null.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.b_null SIMPLE [(scratch_null)scratch_null.FieldSchema(name:b_null, type:bigint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.bo SIMPLE [(scratch_null)scratch_null.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.bo_null SIMPLE [(scratch_null)scratch_null.FieldSchema(name:bo_null, type:boolean, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.i SIMPLE [(scratch_null)scratch_null.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.i_null SIMPLE [(scratch_null)scratch_null.FieldSchema(name:i_null, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.si SIMPLE [(scratch_null)scratch_null.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.si_null SIMPLE [(scratch_null)scratch_null.FieldSchema(name:si_null, type:smallint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.t SIMPLE [(scratch_null)scratch_null.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.t_null SIMPLE [(scratch_null)scratch_null.FieldSchema(name:t_null, type:tinyint, comment:null), ] +scratch_null.t scratch_null.si scratch_null.i scratch_null.b scratch_null.bo scratch_null.t_null scratch_null.si_null scratch_null.i_null scratch_null.b_null scratch_null.bo_null +PREHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null is null AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null is null AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vectortab2k_orc_null + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t_null (type: tinyint), si (type: smallint), i (type: int), (t_null is null and (si > 0) and (i < 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) + sort order: +++ + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: boolean) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null is null AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null is null AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +c0 +26405878155 +PREHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vectortab2k_orc_null + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t_null (type: tinyint), si (type: smallint), i (type: int), ((t_null < 0) and (si > 0) and (i < 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) + sort order: +++ + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: boolean) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +c0 +26405877723 +PREHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 AND si_null > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 AND si_null > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vectortab2k_orc_null + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t_null (type: tinyint), si_null (type: smallint), i (type: int), ((t_null < 0) and (si_null > 0) and (i < 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) + sort order: +++ + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: boolean) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 AND si_null > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 AND si_null > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +c0 +18075199789 +PREHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (t_null < 0 AND si_null > 0 AND i_null < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i_null) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (t_null < 0 AND si_null > 0 AND i_null < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i_null) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vectortab2k_orc_null + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t_null (type: tinyint), si_null (type: smallint), i_null (type: int), ((t_null < 0) and (si_null > 0) and (i_null < 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) + sort order: +++ + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: boolean) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (t_null < 0 AND si_null > 0 AND i_null < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i_null) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (t_null < 0 AND si_null > 0 AND i_null < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i_null) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +c0 +0 +PREHOOK: query: -- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null is null AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) = 3 + order by t_null, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +POSTHOOK: query: -- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null is null AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) = 3 + order by t_null, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +c0 +5954311950 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) = 2 + order by t_null, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) = 2 + order by t_null, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +c0 +-29059205967 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 AND si_null > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) != 3 + order by t_null, si_null, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 AND si_null > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) != 3 + order by t_null, si_null, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +c0 +8046794007 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 AND si_null > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) < 3 + order by t_null, si_null, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 AND si_null > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) < 3 + order by t_null, si_null, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +c0 +8046794007 diff --git ql/src/test/results/clientpositive/tez/vector_multi_or_projection.q.out ql/src/test/results/clientpositive/tez/vector_multi_or_projection.q.out new file mode 100644 index 0000000..823d3e7 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_multi_or_projection.q.out @@ -0,0 +1,1066 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2k +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@vectortab2k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@vectortab2k +PREHOOK: query: CREATE TABLE scratch AS SELECT t, si, i, b, bo FROM vectortab2k +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@vectortab2k +PREHOOK: Output: database:default +PREHOOK: Output: default@scratch +POSTHOOK: query: CREATE TABLE scratch AS SELECT t, si, i, b, bo FROM vectortab2k +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@vectortab2k +POSTHOOK: Output: database:default +POSTHOOK: Output: default@scratch +POSTHOOK: Lineage: scratch.b SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: scratch.bo SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: scratch.i SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: scratch.si SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: scratch.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] +t si i b bo +PREHOOK: query: INSERT INTO TABLE scratch VALUES (NULL, NULL, NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@scratch +POSTHOOK: query: INSERT INTO TABLE scratch VALUES (NULL, NULL, NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@scratch +POSTHOOK: Lineage: scratch.b EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: scratch.bo EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col5, type:string, comment:), ] +POSTHOOK: Lineage: scratch.i EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: scratch.si EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: scratch.t EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +_col0 _col1 _col2 _col3 _col4 +PREHOOK: query: CREATE TABLE vectortab2k_orc STORED AS ORC AS SELECT * FROM scratch +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@scratch +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2k_orc +POSTHOOK: query: CREATE TABLE vectortab2k_orc STORED AS ORC AS SELECT * FROM scratch +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@scratch +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2k_orc +POSTHOOK: Lineage: vectortab2k_orc.b SIMPLE [(scratch)scratch.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc.bo SIMPLE [(scratch)scratch.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc.i SIMPLE [(scratch)scratch.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc.si SIMPLE [(scratch)scratch.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc.t SIMPLE [(scratch)scratch.FieldSchema(name:t, type:tinyint, comment:null), ] +scratch.t scratch.si scratch.i scratch.b scratch.bo +PREHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc + order by t, si, i) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc + order by t, si, i) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vectortab2k_orc + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: tinyint), si (type: smallint), i (type: int), ((t < 0) or (si > 0) or (i < 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) + sort order: +++ + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: boolean) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc + order by t, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc + order by t, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +c0 +22128071730 +PREHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 OR si > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc + order by t, si, i, b) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 OR si > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc + order by t, si, i, b) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vectortab2k_orc + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), ((t < 0) or (si > 0) or (i < 0) or (b > 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint) + sort order: ++++ + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: boolean) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,KEY.reducesinkkey3,VALUE._col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 OR si > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc + order by t, si, i, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 OR si > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc + order by t, si, i, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +c0 +-1983100769 +PREHOOK: query: -- Use a boolean column rather than a column comparison expression. +EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, bo, b, (t < 0 OR si > 0 OR bo OR b > 0) as multi_or_col from vectortab2k_orc + order by t, si, bo, b) as q +PREHOOK: type: QUERY +POSTHOOK: query: -- Use a boolean column rather than a column comparison expression. +EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, bo, b, (t < 0 OR si > 0 OR bo OR b > 0) as multi_or_col from vectortab2k_orc + order by t, si, bo, b) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vectortab2k_orc + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: tinyint), si (type: smallint), bo (type: boolean), b (type: bigint), ((t < 0) or (si > 0) or bo or (b > 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: boolean), _col3 (type: bigint) + sort order: ++++ + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: boolean) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,KEY.reducesinkkey3,VALUE._col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, bo, b, (t < 0 OR si > 0 OR bo OR b > 0) as multi_or_col from vectortab2k_orc + order by t, si, bo, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, bo, b, (t < 0 OR si > 0 OR bo OR b > 0) as multi_or_col from vectortab2k_orc + order by t, si, bo, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +c0 +30392094034 +PREHOOK: query: -- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc + where pmod(i,4) = 2 + order by t, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +POSTHOOK: query: -- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc + where pmod(i,4) = 2 + order by t, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +c0 +-29024945908 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 OR si > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc + where pmod(si,4) < 2 + order by t, si, i, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 OR si > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc + where pmod(si,4) < 2 + order by t, si, i, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +c0 +47654365207 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, bo, b, (t < 0 OR si > 0 OR bo OR b > 0) as multi_or_col from vectortab2k_orc + where pmod(i,4) = 2 + order by t, si, bo, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, bo, b, (t < 0 OR si > 0 OR bo OR b > 0) as multi_or_col from vectortab2k_orc + where pmod(i,4) = 2 + order by t, si, bo, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +c0 +-30154518612 +PREHOOK: query: CREATE TABLE scratch_repeat AS SELECT t, si, i, b, bo, 20 as t_repeat, + 9000 as si_repeat, 9233320 as i_repeat, -823823999339992 as b_repeat, false as bo_repeat_false, true as bo_repeat_true FROM vectortab2k +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@vectortab2k +PREHOOK: Output: database:default +PREHOOK: Output: default@scratch_repeat +POSTHOOK: query: CREATE TABLE scratch_repeat AS SELECT t, si, i, b, bo, 20 as t_repeat, + 9000 as si_repeat, 9233320 as i_repeat, -823823999339992 as b_repeat, false as bo_repeat_false, true as bo_repeat_true FROM vectortab2k +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@vectortab2k +POSTHOOK: Output: database:default +POSTHOOK: Output: default@scratch_repeat +POSTHOOK: Lineage: scratch_repeat.b SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: scratch_repeat.b_repeat SIMPLE [] +POSTHOOK: Lineage: scratch_repeat.bo SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: scratch_repeat.bo_repeat_false SIMPLE [] +POSTHOOK: Lineage: scratch_repeat.bo_repeat_true SIMPLE [] +POSTHOOK: Lineage: scratch_repeat.i SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: scratch_repeat.i_repeat SIMPLE [] +POSTHOOK: Lineage: scratch_repeat.si SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: scratch_repeat.si_repeat SIMPLE [] +POSTHOOK: Lineage: scratch_repeat.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: scratch_repeat.t_repeat SIMPLE [] +t si i b bo t_repeat si_repeat i_repeat b_repeat bo_repeat_false bo_repeat_true +PREHOOK: query: -- The repeated columns ought to create repeated VectorizedRowBatch for those columns. +-- And then when we do a comparison, we should generate a repeated boolean result. +CREATE TABLE vectortab2k_orc_repeat STORED AS ORC AS SELECT * FROM scratch_repeat +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@scratch_repeat +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2k_orc_repeat +POSTHOOK: query: -- The repeated columns ought to create repeated VectorizedRowBatch for those columns. +-- And then when we do a comparison, we should generate a repeated boolean result. +CREATE TABLE vectortab2k_orc_repeat STORED AS ORC AS SELECT * FROM scratch_repeat +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@scratch_repeat +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2k_orc_repeat +POSTHOOK: Lineage: vectortab2k_orc_repeat.b SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.b_repeat SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:b_repeat, type:bigint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.bo SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.bo_repeat_false SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:bo_repeat_false, type:boolean, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.bo_repeat_true SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:bo_repeat_true, type:boolean, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.i SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.i_repeat SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:i_repeat, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.si SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.si_repeat SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:si_repeat, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.t SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.t_repeat SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:t_repeat, type:int, comment:null), ] +scratch_repeat.t scratch_repeat.si scratch_repeat.i scratch_repeat.b scratch_repeat.bo scratch_repeat.t_repeat scratch_repeat.si_repeat scratch_repeat.i_repeat scratch_repeat.b_repeat scratch_repeat.bo_repeat_false scratch_repeat.bo_repeat_true +PREHOOK: query: -- t_repeat < 0 should generate all false. +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc_repeat + order by t_repeat, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: -- t_repeat < 0 should generate all false. +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc_repeat + order by t_repeat, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +23302551848 +PREHOOK: query: -- t_repeat > 0 should generate all true. +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat > 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc_repeat + order by t_repeat, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: -- t_repeat > 0 should generate all true. +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat > 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc_repeat + order by t_repeat, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +23302552427 +PREHOOK: query: -- Two repeated false columns at beginning... +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si_repeat, i, (t_repeat < 0 OR si_repeat < 0 OR i < 0) as multi_or_col from vectortab2k_orc_repeat + order by t_repeat, si_repeat, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: -- Two repeated false columns at beginning... +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si_repeat, i, (t_repeat < 0 OR si_repeat < 0 OR i < 0) as multi_or_col from vectortab2k_orc_repeat + order by t_repeat, si_repeat, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +15090004243 +PREHOOK: query: -- si_repeat > 0 should generate all true. +SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b, (t < 0 OR si_repeat > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc_repeat + order by t, si_repeat, i, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: -- si_repeat > 0 should generate all true. +SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b, (t < 0 OR si_repeat > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc_repeat + order by t, si_repeat, i, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +-3168990878 +PREHOOK: query: -- si_repeat < 0 should generate all false. +SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b, (t < 0 OR si_repeat < 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc_repeat + order by t, si_repeat, i, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: -- si_repeat < 0 should generate all false. +SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b, (t < 0 OR si_repeat < 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc_repeat + order by t, si_repeat, i, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +-3168991048 +PREHOOK: query: -- Use a boolean column rather than a column comparison expression. +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, bo_repeat_false, b, (t_repeat > 0 OR si > 0 OR bo_repeat_false OR b > 0) as multi_or_col from vectortab2k_orc_repeat + order by t_repeat, si, bo_repeat_false, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: -- Use a boolean column rather than a column comparison expression. +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, bo_repeat_false, b, (t_repeat > 0 OR si > 0 OR bo_repeat_false OR b > 0) as multi_or_col from vectortab2k_orc_repeat + order by t_repeat, si, bo_repeat_false, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +6670542465 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, bo_repeat_true, b, (t_repeat > 0 OR si > 0 OR bo_repeat_true OR b > 0) as multi_or_col from vectortab2k_orc_repeat + order by t_repeat, si, bo_repeat_true, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, bo_repeat_true, b, (t_repeat > 0 OR si > 0 OR bo_repeat_true OR b > 0) as multi_or_col from vectortab2k_orc_repeat + order by t_repeat, si, bo_repeat_true, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +6672464465 +PREHOOK: query: -- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(i,4) > 1 + order by t_repeat, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: -- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(i,4) > 1 + order by t_repeat, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +-22560313971 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat > 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(si,4) > 1 + order by t_repeat, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat > 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(si,4) > 1 + order by t_repeat, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +18193271981 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_repeat, si_repeat, i, (t_repeat < 0 OR si_repeat < 0 OR i < 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(i,4) > 1 + order by t_repeat, si_repeat, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_repeat, si_repeat, i, (t_repeat < 0 OR si_repeat < 0 OR i < 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(i,4) > 1 + order by t_repeat, si_repeat, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +-24026515302 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b, (t < 0 OR si_repeat > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(t,4) > 1 + order by t, si_repeat, i, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b, (t < 0 OR si_repeat > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(t,4) > 1 + order by t, si_repeat, i, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +-16534191176 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b, (t < 0 OR si_repeat < 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(b,4) > 1 + order by t, si_repeat, i, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b, (t < 0 OR si_repeat < 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(b,4) > 1 + order by t, si_repeat, i, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +-23243914195 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, bo_repeat_false, b, (t_repeat > 0 OR si > 0 OR bo_repeat_false OR b > 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(si,4) > 1 + order by t_repeat, si, bo_repeat_false, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, bo_repeat_false, b, (t_repeat > 0 OR si > 0 OR bo_repeat_false OR b > 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(si,4) > 1 + order by t_repeat, si, bo_repeat_false, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +15269030422 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, bo_repeat_true, b, (t_repeat > 0 OR si > 0 OR bo_repeat_true OR b > 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(si,4) < 2 + order by t_repeat, si, bo_repeat_true, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, bo_repeat_true, b, (t_repeat > 0 OR si > 0 OR bo_repeat_true OR b > 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(si,4) < 2 + order by t_repeat, si, bo_repeat_true, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +-17101357469 +PREHOOK: query: CREATE TABLE scratch_null AS SELECT t, si, i, b, bo, + cast(null as tinyint) as t_null, cast(null as smallint) as si_null, cast(null as int) as i_null, cast(null as bigint) as b_null, cast(null as boolean) as bo_null FROM vectortab2k +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@vectortab2k +PREHOOK: Output: database:default +PREHOOK: Output: default@scratch_null +POSTHOOK: query: CREATE TABLE scratch_null AS SELECT t, si, i, b, bo, + cast(null as tinyint) as t_null, cast(null as smallint) as si_null, cast(null as int) as i_null, cast(null as bigint) as b_null, cast(null as boolean) as bo_null FROM vectortab2k +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@vectortab2k +POSTHOOK: Output: database:default +POSTHOOK: Output: default@scratch_null +POSTHOOK: Lineage: scratch_null.b SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: scratch_null.b_null EXPRESSION [] +POSTHOOK: Lineage: scratch_null.bo SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: scratch_null.bo_null EXPRESSION [] +POSTHOOK: Lineage: scratch_null.i SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: scratch_null.i_null EXPRESSION [] +POSTHOOK: Lineage: scratch_null.si SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: scratch_null.si_null EXPRESSION [] +POSTHOOK: Lineage: scratch_null.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: scratch_null.t_null EXPRESSION [] +t si i b bo t_null si_null i_null b_null bo_null +PREHOOK: query: -- The nulled columns ought to create repeated null VectorizedRowBatch for those columns. +CREATE TABLE vectortab2k_orc_null STORED AS ORC AS SELECT * FROM scratch_null +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@scratch_null +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2k_orc_null +POSTHOOK: query: -- The nulled columns ought to create repeated null VectorizedRowBatch for those columns. +CREATE TABLE vectortab2k_orc_null STORED AS ORC AS SELECT * FROM scratch_null +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@scratch_null +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2k_orc_null +POSTHOOK: Lineage: vectortab2k_orc_null.b SIMPLE [(scratch_null)scratch_null.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.b_null SIMPLE [(scratch_null)scratch_null.FieldSchema(name:b_null, type:bigint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.bo SIMPLE [(scratch_null)scratch_null.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.bo_null SIMPLE [(scratch_null)scratch_null.FieldSchema(name:bo_null, type:boolean, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.i SIMPLE [(scratch_null)scratch_null.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.i_null SIMPLE [(scratch_null)scratch_null.FieldSchema(name:i_null, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.si SIMPLE [(scratch_null)scratch_null.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.si_null SIMPLE [(scratch_null)scratch_null.FieldSchema(name:si_null, type:smallint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.t SIMPLE [(scratch_null)scratch_null.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.t_null SIMPLE [(scratch_null)scratch_null.FieldSchema(name:t_null, type:tinyint, comment:null), ] +scratch_null.t scratch_null.si scratch_null.i scratch_null.b scratch_null.bo scratch_null.t_null scratch_null.si_null scratch_null.i_null scratch_null.b_null scratch_null.bo_null +PREHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null is null OR si > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null is null OR si > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vectortab2k_orc_null + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t_null (type: tinyint), si (type: smallint), i (type: int), (t_null is null or (si > 0) or (i < 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) + sort order: +++ + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: boolean) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null is null OR si > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null is null OR si > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +c0 +26405879723 +PREHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null < 0 OR si > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null < 0 OR si > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vectortab2k_orc_null + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t_null (type: tinyint), si (type: smallint), i (type: int), ((t_null < 0) or (si > 0) or (i < 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) + sort order: +++ + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: boolean) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null < 0 OR si > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null < 0 OR si > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +c0 +26405879144 +PREHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 OR si_null > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 OR si_null > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vectortab2k_orc_null + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t_null (type: tinyint), si_null (type: smallint), i (type: int), ((t_null < 0) or (si_null > 0) or (i < 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) + sort order: +++ + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: boolean) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 OR si_null > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 OR si_null > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +c0 +18075200723 +PREHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (t_null < 0 OR si_null > 0 OR i_null < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i_null) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (t_null < 0 OR si_null > 0 OR i_null < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i_null) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vectortab2k_orc_null + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t_null (type: tinyint), si_null (type: smallint), i_null (type: int), ((t_null < 0) or (si_null > 0) or (i_null < 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) + sort order: +++ + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: boolean) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (t_null < 0 OR si_null > 0 OR i_null < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i_null) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (t_null < 0 OR si_null > 0 OR i_null < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i_null) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +c0 +0 +PREHOOK: query: -- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null is null OR si > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) = 2 + order by t_null, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +POSTHOOK: query: -- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null is null OR si > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) = 2 + order by t_null, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +c0 +-29059205491 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null < 0 OR si > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) = 3 + order by t_null, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null < 0 OR si > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) = 3 + order by t_null, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +c0 +5954312170 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 OR si_null > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) = 0 + order by t_null, si_null, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 OR si_null > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) = 0 + order by t_null, si_null, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +c0 +-19001666507 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (t_null < 0 OR si_null > 0 OR i_null < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) != 2 + order by t_null, si_null, i_null) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (t_null < 0 OR si_null > 0 OR i_null < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) != 2 + order by t_null, si_null, i_null) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +c0 +0 diff --git ql/src/test/results/clientpositive/vector_multi_and_projection.q.out ql/src/test/results/clientpositive/vector_multi_and_projection.q.out new file mode 100644 index 0000000..84e3001 --- /dev/null +++ ql/src/test/results/clientpositive/vector_multi_and_projection.q.out @@ -0,0 +1,1017 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2k +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@vectortab2k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@vectortab2k +PREHOOK: query: CREATE TABLE scratch AS SELECT t, si, i, b, bo FROM vectortab2k +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@vectortab2k +PREHOOK: Output: database:default +PREHOOK: Output: default@scratch +POSTHOOK: query: CREATE TABLE scratch AS SELECT t, si, i, b, bo FROM vectortab2k +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@vectortab2k +POSTHOOK: Output: database:default +POSTHOOK: Output: default@scratch +POSTHOOK: Lineage: scratch.b SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: scratch.bo SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: scratch.i SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: scratch.si SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: scratch.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] +t si i b bo +PREHOOK: query: INSERT INTO TABLE scratch VALUES (NULL, NULL, NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@scratch +POSTHOOK: query: INSERT INTO TABLE scratch VALUES (NULL, NULL, NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@scratch +POSTHOOK: Lineage: scratch.b EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: scratch.bo EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col5, type:string, comment:), ] +POSTHOOK: Lineage: scratch.i EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: scratch.si EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: scratch.t EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +_col0 _col1 _col2 _col3 _col4 +PREHOOK: query: CREATE TABLE vectortab2k_orc STORED AS ORC AS SELECT * FROM scratch +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@scratch +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2k_orc +POSTHOOK: query: CREATE TABLE vectortab2k_orc STORED AS ORC AS SELECT * FROM scratch +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@scratch +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2k_orc +POSTHOOK: Lineage: vectortab2k_orc.b SIMPLE [(scratch)scratch.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc.bo SIMPLE [(scratch)scratch.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc.i SIMPLE [(scratch)scratch.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc.si SIMPLE [(scratch)scratch.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc.t SIMPLE [(scratch)scratch.FieldSchema(name:t, type:tinyint, comment:null), ] +scratch.t scratch.si scratch.i scratch.b scratch.bo +PREHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc + order by t, si, i) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc + order by t, si, i) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vectortab2k_orc + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: tinyint), si (type: smallint), i (type: int), ((t < 0) and (si > 0) and (i < 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) + sort order: +++ + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: boolean) + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc + order by t, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc + order by t, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +c0 +22128070258 +PREHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 AND si > 0 AND i < 0 AND b > 0) as multi_and_col from vectortab2k_orc + order by t, si, i, b) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 AND si > 0 AND i < 0 AND b > 0) as multi_and_col from vectortab2k_orc + order by t, si, i, b) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vectortab2k_orc + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), ((t < 0) and (si > 0) and (i < 0) and (b > 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint) + sort order: ++++ + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: boolean) + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,KEY.reducesinkkey3,VALUE._col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 AND si > 0 AND i < 0 AND b > 0) as multi_and_col from vectortab2k_orc + order by t, si, i, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 AND si > 0 AND i < 0 AND b > 0) as multi_and_col from vectortab2k_orc + order by t, si, i, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +c0 +-1983102543 +PREHOOK: query: -- Use a boolean column rather than a column comparison expression. +EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, bo, b, (t < 0 AND si > 0 AND bo AND b > 0) as multi_and_col from vectortab2k_orc + order by t, si, bo, b) as q +PREHOOK: type: QUERY +POSTHOOK: query: -- Use a boolean column rather than a column comparison expression. +EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, bo, b, (t < 0 AND si > 0 AND bo AND b > 0) as multi_and_col from vectortab2k_orc + order by t, si, bo, b) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vectortab2k_orc + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: tinyint), si (type: smallint), bo (type: boolean), b (type: bigint), ((t < 0) and (si > 0) and bo and (b > 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: boolean), _col3 (type: bigint) + sort order: ++++ + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: boolean) + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,KEY.reducesinkkey3,VALUE._col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, bo, b, (t < 0 AND si > 0 AND bo AND b > 0) as multi_and_col from vectortab2k_orc + order by t, si, bo, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, bo, b, (t < 0 AND si > 0 AND bo AND b > 0) as multi_and_col from vectortab2k_orc + order by t, si, bo, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +c0 +30392092250 +PREHOOK: query: -- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc + where pmod(t, 4) > 1 + order by t, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +POSTHOOK: query: -- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc + where pmod(t, 4) > 1 + order by t, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +c0 +4766599363 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 AND si > 0 AND i < 0 AND b > 0) as multi_and_col from vectortab2k_orc + where pmod(t, 4) < 2 + order by t, si, i, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 AND si > 0 AND i < 0 AND b > 0) as multi_and_col from vectortab2k_orc + where pmod(t, 4) < 2 + order by t, si, i, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +c0 +29923525611 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, bo, b, (t < 0 AND si > 0 AND bo AND b > 0) as multi_and_col from vectortab2k_orc + where pmod(b, 8) == 7 + order by t, si, bo, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, bo, b, (t < 0 AND si > 0 AND bo AND b > 0) as multi_and_col from vectortab2k_orc + where pmod(b, 8) == 7 + order by t, si, bo, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +c0 +1048488580 +PREHOOK: query: CREATE TABLE scratch_repeat AS SELECT t, si, i, b, bo, 20 as t_repeat, + 9000 as si_repeat, 9233320 as i_repeat, -823823999339992 as b_repeat, false as bo_repeat_false, true as bo_repeat_true FROM vectortab2k +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@vectortab2k +PREHOOK: Output: database:default +PREHOOK: Output: default@scratch_repeat +POSTHOOK: query: CREATE TABLE scratch_repeat AS SELECT t, si, i, b, bo, 20 as t_repeat, + 9000 as si_repeat, 9233320 as i_repeat, -823823999339992 as b_repeat, false as bo_repeat_false, true as bo_repeat_true FROM vectortab2k +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@vectortab2k +POSTHOOK: Output: database:default +POSTHOOK: Output: default@scratch_repeat +POSTHOOK: Lineage: scratch_repeat.b SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: scratch_repeat.b_repeat SIMPLE [] +POSTHOOK: Lineage: scratch_repeat.bo SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: scratch_repeat.bo_repeat_false SIMPLE [] +POSTHOOK: Lineage: scratch_repeat.bo_repeat_true SIMPLE [] +POSTHOOK: Lineage: scratch_repeat.i SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: scratch_repeat.i_repeat SIMPLE [] +POSTHOOK: Lineage: scratch_repeat.si SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: scratch_repeat.si_repeat SIMPLE [] +POSTHOOK: Lineage: scratch_repeat.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: scratch_repeat.t_repeat SIMPLE [] +t si i b bo t_repeat si_repeat i_repeat b_repeat bo_repeat_false bo_repeat_true +PREHOOK: query: -- The repeated columns ought to create repeated VectorizedRowBatch for those columns. +-- And then when we do a comparison, we should generate a repeated boolean result. +CREATE TABLE vectortab2k_orc_repeat STORED AS ORC AS SELECT * FROM scratch_repeat +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@scratch_repeat +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2k_orc_repeat +POSTHOOK: query: -- The repeated columns ought to create repeated VectorizedRowBatch for those columns. +-- And then when we do a comparison, we should generate a repeated boolean result. +CREATE TABLE vectortab2k_orc_repeat STORED AS ORC AS SELECT * FROM scratch_repeat +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@scratch_repeat +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2k_orc_repeat +POSTHOOK: Lineage: vectortab2k_orc_repeat.b SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.b_repeat SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:b_repeat, type:bigint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.bo SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.bo_repeat_false SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:bo_repeat_false, type:boolean, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.bo_repeat_true SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:bo_repeat_true, type:boolean, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.i SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.i_repeat SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:i_repeat, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.si SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.si_repeat SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:si_repeat, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.t SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.t_repeat SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:t_repeat, type:int, comment:null), ] +scratch_repeat.t scratch_repeat.si scratch_repeat.i scratch_repeat.b scratch_repeat.bo scratch_repeat.t_repeat scratch_repeat.si_repeat scratch_repeat.i_repeat scratch_repeat.b_repeat scratch_repeat.bo_repeat_false scratch_repeat.bo_repeat_true +PREHOOK: query: -- t_repeat > 0 should generate all true. +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat > 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_repeat + order by t_repeat, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: -- t_repeat > 0 should generate all true. +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat > 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_repeat + order by t_repeat, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +23302550859 +PREHOOK: query: -- t_repeat < 0 should generate all false. +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_repeat + order by t_repeat, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: -- t_repeat < 0 should generate all false. +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_repeat + order by t_repeat, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +23302550427 +PREHOOK: query: -- Two repeated false columns at beginning... +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si_repeat, i, (t_repeat > 0 AND si_repeat > 0 AND i < 0) as multi_and_col from vectortab2k_orc_repeat + order by t_repeat, si_repeat, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: -- Two repeated false columns at beginning... +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si_repeat, i, (t_repeat > 0 AND si_repeat > 0 AND i < 0) as multi_and_col from vectortab2k_orc_repeat + order by t_repeat, si_repeat, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +15090004243 +PREHOOK: query: -- si_repeat > 0 should generate all true. +SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b_repeat, (t < 0 AND si_repeat > 0 AND i < 0 AND b_repeat > 0) as multi_and_col from vectortab2k_orc_repeat + order by t, si_repeat, i, b_repeat) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: -- si_repeat > 0 should generate all true. +SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b_repeat, (t < 0 AND si_repeat > 0 AND i < 0 AND b_repeat > 0) as multi_and_col from vectortab2k_orc_repeat + order by t, si_repeat, i, b_repeat) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +-4908324365 +PREHOOK: query: -- si_repeat < 0 should generate all false. +SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b_repeat, (t < 0 AND si_repeat < 0 AND i < 0 AND b_repeat > 0) as multi_and_col from vectortab2k_orc_repeat + order by t, si_repeat, i, b_repeat) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: -- si_repeat < 0 should generate all false. +SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b_repeat, (t < 0 AND si_repeat < 0 AND i < 0 AND b_repeat > 0) as multi_and_col from vectortab2k_orc_repeat + order by t, si_repeat, i, b_repeat) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +-4908324365 +PREHOOK: query: -- Use a boolean column rather than a column comparison expression. +SELECT sum(hash(*)) FROM + (SELECT t, si, bo_repeat_false, b, (t < 0 AND si > 0 AND bo_repeat_false AND b > 0) as multi_and_col from vectortab2k_orc_repeat + order by t, si, bo_repeat_false, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: -- Use a boolean column rather than a column comparison expression. +SELECT sum(hash(*)) FROM + (SELECT t, si, bo_repeat_false, b, (t < 0 AND si > 0 AND bo_repeat_false AND b > 0) as multi_and_col from vectortab2k_orc_repeat + order by t, si, bo_repeat_false, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +30391190705 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, bo_repeat_true, b, (t < 0 AND si > 0 AND bo_repeat_true AND b > 0) as multi_and_col from vectortab2k_orc_repeat + order by t, si, bo_repeat_true, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, bo_repeat_true, b, (t < 0 AND si > 0 AND bo_repeat_true AND b > 0) as multi_and_col from vectortab2k_orc_repeat + order by t, si, bo_repeat_true, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +30393113000 +PREHOOK: query: -- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat > 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(si, 4) = 0 + order by t_repeat, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: -- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat > 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(si, 4) = 0 + order by t_repeat, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +43680228 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(si, 4) = 3 + order by t_repeat, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(si, 4) = 3 + order by t_repeat, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +7322792767 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_repeat, si_repeat, i, (t_repeat > 0 AND si_repeat > 0 AND i < 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(si, 4) != 3 + order by t_repeat, si_repeat, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_repeat, si_repeat, i, (t_repeat > 0 AND si_repeat > 0 AND i < 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(si, 4) != 3 + order by t_repeat, si_repeat, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +2570758619 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b_repeat, (t < 0 AND si_repeat > 0 AND i < 0 AND b_repeat > 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(si, 4) < 2 + order by t, si_repeat, i, b_repeat) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b_repeat, (t < 0 AND si_repeat > 0 AND i < 0 AND b_repeat > 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(si, 4) < 2 + order by t, si_repeat, i, b_repeat) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +37376898668 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b_repeat, (t < 0 AND si_repeat < 0 AND i < 0 AND b_repeat > 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(t, 4) = 0 + order by t, si_repeat, i, b_repeat) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b_repeat, (t < 0 AND si_repeat < 0 AND i < 0 AND b_repeat > 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(t, 4) = 0 + order by t, si_repeat, i, b_repeat) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +16927876919 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, bo_repeat_false, b, (t < 0 AND si > 0 AND bo_repeat_false AND b > 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(b, 4) > 1 + order by t, si, bo_repeat_false, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, bo_repeat_false, b, (t < 0 AND si > 0 AND bo_repeat_false AND b > 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(b, 4) > 1 + order by t, si, bo_repeat_false, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +-12904162858 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, bo_repeat_true, b, (t < 0 AND si > 0 AND bo_repeat_true AND b > 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(si, 4) < 3 + order by t, si, bo_repeat_true, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, bo_repeat_true, b, (t < 0 AND si > 0 AND bo_repeat_true AND b > 0) as multi_and_col from vectortab2k_orc_repeat + where pmod(si, 4) < 3 + order by t, si, bo_repeat_true, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +12129190878 +PREHOOK: query: CREATE TABLE scratch_null AS SELECT t, si, i, b, bo, + cast(null as tinyint) as t_null, cast(null as smallint) as si_null, cast(null as int) as i_null, cast(null as bigint) as b_null, cast(null as boolean) as bo_null FROM vectortab2k +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@vectortab2k +PREHOOK: Output: database:default +PREHOOK: Output: default@scratch_null +POSTHOOK: query: CREATE TABLE scratch_null AS SELECT t, si, i, b, bo, + cast(null as tinyint) as t_null, cast(null as smallint) as si_null, cast(null as int) as i_null, cast(null as bigint) as b_null, cast(null as boolean) as bo_null FROM vectortab2k +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@vectortab2k +POSTHOOK: Output: database:default +POSTHOOK: Output: default@scratch_null +POSTHOOK: Lineage: scratch_null.b SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: scratch_null.b_null EXPRESSION [] +POSTHOOK: Lineage: scratch_null.bo SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: scratch_null.bo_null EXPRESSION [] +POSTHOOK: Lineage: scratch_null.i SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: scratch_null.i_null EXPRESSION [] +POSTHOOK: Lineage: scratch_null.si SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: scratch_null.si_null EXPRESSION [] +POSTHOOK: Lineage: scratch_null.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: scratch_null.t_null EXPRESSION [] +t si i b bo t_null si_null i_null b_null bo_null +PREHOOK: query: -- The nulled columns ought to create repeated null VectorizedRowBatch for those columns. +CREATE TABLE vectortab2k_orc_null STORED AS ORC AS SELECT * FROM scratch_null +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@scratch_null +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2k_orc_null +POSTHOOK: query: -- The nulled columns ought to create repeated null VectorizedRowBatch for those columns. +CREATE TABLE vectortab2k_orc_null STORED AS ORC AS SELECT * FROM scratch_null +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@scratch_null +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2k_orc_null +POSTHOOK: Lineage: vectortab2k_orc_null.b SIMPLE [(scratch_null)scratch_null.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.b_null SIMPLE [(scratch_null)scratch_null.FieldSchema(name:b_null, type:bigint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.bo SIMPLE [(scratch_null)scratch_null.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.bo_null SIMPLE [(scratch_null)scratch_null.FieldSchema(name:bo_null, type:boolean, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.i SIMPLE [(scratch_null)scratch_null.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.i_null SIMPLE [(scratch_null)scratch_null.FieldSchema(name:i_null, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.si SIMPLE [(scratch_null)scratch_null.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.si_null SIMPLE [(scratch_null)scratch_null.FieldSchema(name:si_null, type:smallint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.t SIMPLE [(scratch_null)scratch_null.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.t_null SIMPLE [(scratch_null)scratch_null.FieldSchema(name:t_null, type:tinyint, comment:null), ] +scratch_null.t scratch_null.si scratch_null.i scratch_null.b scratch_null.bo scratch_null.t_null scratch_null.si_null scratch_null.i_null scratch_null.b_null scratch_null.bo_null +PREHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null is null AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null is null AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vectortab2k_orc_null + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t_null (type: tinyint), si (type: smallint), i (type: int), (t_null is null and (si > 0) and (i < 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) + sort order: +++ + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: boolean) + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null is null AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null is null AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +c0 +26405878155 +PREHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vectortab2k_orc_null + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t_null (type: tinyint), si (type: smallint), i (type: int), ((t_null < 0) and (si > 0) and (i < 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) + sort order: +++ + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: boolean) + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +c0 +26405877723 +PREHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 AND si_null > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 AND si_null > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vectortab2k_orc_null + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t_null (type: tinyint), si_null (type: smallint), i (type: int), ((t_null < 0) and (si_null > 0) and (i < 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) + sort order: +++ + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: boolean) + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 AND si_null > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 AND si_null > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +c0 +18075199789 +PREHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (t_null < 0 AND si_null > 0 AND i_null < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i_null) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (t_null < 0 AND si_null > 0 AND i_null < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i_null) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vectortab2k_orc_null + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t_null (type: tinyint), si_null (type: smallint), i_null (type: int), ((t_null < 0) and (si_null > 0) and (i_null < 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) + sort order: +++ + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: boolean) + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (t_null < 0 AND si_null > 0 AND i_null < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i_null) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (t_null < 0 AND si_null > 0 AND i_null < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i_null) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +c0 +0 +PREHOOK: query: -- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null is null AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) = 3 + order by t_null, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +POSTHOOK: query: -- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null is null AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) = 3 + order by t_null, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +c0 +5954311950 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) = 2 + order by t_null, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) = 2 + order by t_null, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +c0 +-29059205967 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 AND si_null > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) != 3 + order by t_null, si_null, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 AND si_null > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) != 3 + order by t_null, si_null, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +c0 +8046794007 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 AND si_null > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) < 3 + order by t_null, si_null, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 AND si_null > 0 AND i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) < 3 + order by t_null, si_null, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +c0 +8046794007 diff --git ql/src/test/results/clientpositive/vector_multi_or_projection.q.out ql/src/test/results/clientpositive/vector_multi_or_projection.q.out new file mode 100644 index 0000000..a8f5eb8 --- /dev/null +++ ql/src/test/results/clientpositive/vector_multi_or_projection.q.out @@ -0,0 +1,1017 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2k +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@vectortab2k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@vectortab2k +PREHOOK: query: CREATE TABLE scratch AS SELECT t, si, i, b, bo FROM vectortab2k +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@vectortab2k +PREHOOK: Output: database:default +PREHOOK: Output: default@scratch +POSTHOOK: query: CREATE TABLE scratch AS SELECT t, si, i, b, bo FROM vectortab2k +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@vectortab2k +POSTHOOK: Output: database:default +POSTHOOK: Output: default@scratch +POSTHOOK: Lineage: scratch.b SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: scratch.bo SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: scratch.i SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: scratch.si SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: scratch.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] +t si i b bo +PREHOOK: query: INSERT INTO TABLE scratch VALUES (NULL, NULL, NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@scratch +POSTHOOK: query: INSERT INTO TABLE scratch VALUES (NULL, NULL, NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@scratch +POSTHOOK: Lineage: scratch.b EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: scratch.bo EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col5, type:string, comment:), ] +POSTHOOK: Lineage: scratch.i EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: scratch.si EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: scratch.t EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +_col0 _col1 _col2 _col3 _col4 +PREHOOK: query: CREATE TABLE vectortab2k_orc STORED AS ORC AS SELECT * FROM scratch +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@scratch +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2k_orc +POSTHOOK: query: CREATE TABLE vectortab2k_orc STORED AS ORC AS SELECT * FROM scratch +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@scratch +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2k_orc +POSTHOOK: Lineage: vectortab2k_orc.b SIMPLE [(scratch)scratch.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc.bo SIMPLE [(scratch)scratch.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc.i SIMPLE [(scratch)scratch.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc.si SIMPLE [(scratch)scratch.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc.t SIMPLE [(scratch)scratch.FieldSchema(name:t, type:tinyint, comment:null), ] +scratch.t scratch.si scratch.i scratch.b scratch.bo +PREHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc + order by t, si, i) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc + order by t, si, i) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vectortab2k_orc + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: tinyint), si (type: smallint), i (type: int), ((t < 0) or (si > 0) or (i < 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) + sort order: +++ + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: boolean) + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc + order by t, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc + order by t, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +c0 +22128071730 +PREHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 OR si > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc + order by t, si, i, b) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 OR si > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc + order by t, si, i, b) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vectortab2k_orc + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), ((t < 0) or (si > 0) or (i < 0) or (b > 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint) + sort order: ++++ + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: boolean) + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,KEY.reducesinkkey3,VALUE._col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 OR si > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc + order by t, si, i, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 OR si > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc + order by t, si, i, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +c0 +-1983100769 +PREHOOK: query: -- Use a boolean column rather than a column comparison expression. +EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, bo, b, (t < 0 OR si > 0 OR bo OR b > 0) as multi_or_col from vectortab2k_orc + order by t, si, bo, b) as q +PREHOOK: type: QUERY +POSTHOOK: query: -- Use a boolean column rather than a column comparison expression. +EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, bo, b, (t < 0 OR si > 0 OR bo OR b > 0) as multi_or_col from vectortab2k_orc + order by t, si, bo, b) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vectortab2k_orc + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: tinyint), si (type: smallint), bo (type: boolean), b (type: bigint), ((t < 0) or (si > 0) or bo or (b > 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: boolean), _col3 (type: bigint) + sort order: ++++ + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: boolean) + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,KEY.reducesinkkey3,VALUE._col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2001 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, bo, b, (t < 0 OR si > 0 OR bo OR b > 0) as multi_or_col from vectortab2k_orc + order by t, si, bo, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, bo, b, (t < 0 OR si > 0 OR bo OR b > 0) as multi_or_col from vectortab2k_orc + order by t, si, bo, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +c0 +30392094034 +PREHOOK: query: -- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc + where pmod(i,4) = 2 + order by t, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +POSTHOOK: query: -- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc + where pmod(i,4) = 2 + order by t, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +c0 +-29024945908 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 OR si > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc + where pmod(si,4) < 2 + order by t, si, i, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 OR si > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc + where pmod(si,4) < 2 + order by t, si, i, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +c0 +47654365207 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, bo, b, (t < 0 OR si > 0 OR bo OR b > 0) as multi_or_col from vectortab2k_orc + where pmod(i,4) = 2 + order by t, si, bo, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, bo, b, (t < 0 OR si > 0 OR bo OR b > 0) as multi_or_col from vectortab2k_orc + where pmod(i,4) = 2 + order by t, si, bo, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +c0 +-30154518612 +PREHOOK: query: CREATE TABLE scratch_repeat AS SELECT t, si, i, b, bo, 20 as t_repeat, + 9000 as si_repeat, 9233320 as i_repeat, -823823999339992 as b_repeat, false as bo_repeat_false, true as bo_repeat_true FROM vectortab2k +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@vectortab2k +PREHOOK: Output: database:default +PREHOOK: Output: default@scratch_repeat +POSTHOOK: query: CREATE TABLE scratch_repeat AS SELECT t, si, i, b, bo, 20 as t_repeat, + 9000 as si_repeat, 9233320 as i_repeat, -823823999339992 as b_repeat, false as bo_repeat_false, true as bo_repeat_true FROM vectortab2k +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@vectortab2k +POSTHOOK: Output: database:default +POSTHOOK: Output: default@scratch_repeat +POSTHOOK: Lineage: scratch_repeat.b SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: scratch_repeat.b_repeat SIMPLE [] +POSTHOOK: Lineage: scratch_repeat.bo SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: scratch_repeat.bo_repeat_false SIMPLE [] +POSTHOOK: Lineage: scratch_repeat.bo_repeat_true SIMPLE [] +POSTHOOK: Lineage: scratch_repeat.i SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: scratch_repeat.i_repeat SIMPLE [] +POSTHOOK: Lineage: scratch_repeat.si SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: scratch_repeat.si_repeat SIMPLE [] +POSTHOOK: Lineage: scratch_repeat.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: scratch_repeat.t_repeat SIMPLE [] +t si i b bo t_repeat si_repeat i_repeat b_repeat bo_repeat_false bo_repeat_true +PREHOOK: query: -- The repeated columns ought to create repeated VectorizedRowBatch for those columns. +-- And then when we do a comparison, we should generate a repeated boolean result. +CREATE TABLE vectortab2k_orc_repeat STORED AS ORC AS SELECT * FROM scratch_repeat +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@scratch_repeat +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2k_orc_repeat +POSTHOOK: query: -- The repeated columns ought to create repeated VectorizedRowBatch for those columns. +-- And then when we do a comparison, we should generate a repeated boolean result. +CREATE TABLE vectortab2k_orc_repeat STORED AS ORC AS SELECT * FROM scratch_repeat +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@scratch_repeat +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2k_orc_repeat +POSTHOOK: Lineage: vectortab2k_orc_repeat.b SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.b_repeat SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:b_repeat, type:bigint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.bo SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.bo_repeat_false SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:bo_repeat_false, type:boolean, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.bo_repeat_true SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:bo_repeat_true, type:boolean, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.i SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.i_repeat SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:i_repeat, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.si SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.si_repeat SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:si_repeat, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.t SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_repeat.t_repeat SIMPLE [(scratch_repeat)scratch_repeat.FieldSchema(name:t_repeat, type:int, comment:null), ] +scratch_repeat.t scratch_repeat.si scratch_repeat.i scratch_repeat.b scratch_repeat.bo scratch_repeat.t_repeat scratch_repeat.si_repeat scratch_repeat.i_repeat scratch_repeat.b_repeat scratch_repeat.bo_repeat_false scratch_repeat.bo_repeat_true +PREHOOK: query: -- t_repeat < 0 should generate all false. +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc_repeat + order by t_repeat, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: -- t_repeat < 0 should generate all false. +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc_repeat + order by t_repeat, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +23302551848 +PREHOOK: query: -- t_repeat > 0 should generate all true. +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat > 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc_repeat + order by t_repeat, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: -- t_repeat > 0 should generate all true. +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat > 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc_repeat + order by t_repeat, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +23302552427 +PREHOOK: query: -- Two repeated false columns at beginning... +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si_repeat, i, (t_repeat < 0 OR si_repeat < 0 OR i < 0) as multi_or_col from vectortab2k_orc_repeat + order by t_repeat, si_repeat, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: -- Two repeated false columns at beginning... +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si_repeat, i, (t_repeat < 0 OR si_repeat < 0 OR i < 0) as multi_or_col from vectortab2k_orc_repeat + order by t_repeat, si_repeat, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +15090004243 +PREHOOK: query: -- si_repeat > 0 should generate all true. +SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b, (t < 0 OR si_repeat > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc_repeat + order by t, si_repeat, i, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: -- si_repeat > 0 should generate all true. +SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b, (t < 0 OR si_repeat > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc_repeat + order by t, si_repeat, i, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +-3168990878 +PREHOOK: query: -- si_repeat < 0 should generate all false. +SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b, (t < 0 OR si_repeat < 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc_repeat + order by t, si_repeat, i, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: -- si_repeat < 0 should generate all false. +SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b, (t < 0 OR si_repeat < 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc_repeat + order by t, si_repeat, i, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +-3168991048 +PREHOOK: query: -- Use a boolean column rather than a column comparison expression. +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, bo_repeat_false, b, (t_repeat > 0 OR si > 0 OR bo_repeat_false OR b > 0) as multi_or_col from vectortab2k_orc_repeat + order by t_repeat, si, bo_repeat_false, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: -- Use a boolean column rather than a column comparison expression. +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, bo_repeat_false, b, (t_repeat > 0 OR si > 0 OR bo_repeat_false OR b > 0) as multi_or_col from vectortab2k_orc_repeat + order by t_repeat, si, bo_repeat_false, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +6670542465 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, bo_repeat_true, b, (t_repeat > 0 OR si > 0 OR bo_repeat_true OR b > 0) as multi_or_col from vectortab2k_orc_repeat + order by t_repeat, si, bo_repeat_true, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, bo_repeat_true, b, (t_repeat > 0 OR si > 0 OR bo_repeat_true OR b > 0) as multi_or_col from vectortab2k_orc_repeat + order by t_repeat, si, bo_repeat_true, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +6672464465 +PREHOOK: query: -- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(i,4) > 1 + order by t_repeat, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: -- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(i,4) > 1 + order by t_repeat, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +-22560313971 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat > 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(si,4) > 1 + order by t_repeat, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, i, (t_repeat > 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(si,4) > 1 + order by t_repeat, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +18193271981 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_repeat, si_repeat, i, (t_repeat < 0 OR si_repeat < 0 OR i < 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(i,4) > 1 + order by t_repeat, si_repeat, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_repeat, si_repeat, i, (t_repeat < 0 OR si_repeat < 0 OR i < 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(i,4) > 1 + order by t_repeat, si_repeat, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +-24026515302 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b, (t < 0 OR si_repeat > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(t,4) > 1 + order by t, si_repeat, i, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b, (t < 0 OR si_repeat > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(t,4) > 1 + order by t, si_repeat, i, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +-16534191176 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b, (t < 0 OR si_repeat < 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(b,4) > 1 + order by t, si_repeat, i, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si_repeat, i, b, (t < 0 OR si_repeat < 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(b,4) > 1 + order by t, si_repeat, i, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +-23243914195 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, bo_repeat_false, b, (t_repeat > 0 OR si > 0 OR bo_repeat_false OR b > 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(si,4) > 1 + order by t_repeat, si, bo_repeat_false, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, bo_repeat_false, b, (t_repeat > 0 OR si > 0 OR bo_repeat_false OR b > 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(si,4) > 1 + order by t_repeat, si, bo_repeat_false, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +15269030422 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, bo_repeat_true, b, (t_repeat > 0 OR si > 0 OR bo_repeat_true OR b > 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(si,4) < 2 + order by t_repeat, si, bo_repeat_true, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_repeat, si, bo_repeat_true, b, (t_repeat > 0 OR si > 0 OR bo_repeat_true OR b > 0) as multi_or_col from vectortab2k_orc_repeat + where pmod(si,4) < 2 + order by t_repeat, si, bo_repeat_true, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_repeat +#### A masked pattern was here #### +c0 +-17101357469 +PREHOOK: query: CREATE TABLE scratch_null AS SELECT t, si, i, b, bo, + cast(null as tinyint) as t_null, cast(null as smallint) as si_null, cast(null as int) as i_null, cast(null as bigint) as b_null, cast(null as boolean) as bo_null FROM vectortab2k +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@vectortab2k +PREHOOK: Output: database:default +PREHOOK: Output: default@scratch_null +POSTHOOK: query: CREATE TABLE scratch_null AS SELECT t, si, i, b, bo, + cast(null as tinyint) as t_null, cast(null as smallint) as si_null, cast(null as int) as i_null, cast(null as bigint) as b_null, cast(null as boolean) as bo_null FROM vectortab2k +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@vectortab2k +POSTHOOK: Output: database:default +POSTHOOK: Output: default@scratch_null +POSTHOOK: Lineage: scratch_null.b SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: scratch_null.b_null EXPRESSION [] +POSTHOOK: Lineage: scratch_null.bo SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: scratch_null.bo_null EXPRESSION [] +POSTHOOK: Lineage: scratch_null.i SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: scratch_null.i_null EXPRESSION [] +POSTHOOK: Lineage: scratch_null.si SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: scratch_null.si_null EXPRESSION [] +POSTHOOK: Lineage: scratch_null.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: scratch_null.t_null EXPRESSION [] +t si i b bo t_null si_null i_null b_null bo_null +PREHOOK: query: -- The nulled columns ought to create repeated null VectorizedRowBatch for those columns. +CREATE TABLE vectortab2k_orc_null STORED AS ORC AS SELECT * FROM scratch_null +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@scratch_null +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2k_orc_null +POSTHOOK: query: -- The nulled columns ought to create repeated null VectorizedRowBatch for those columns. +CREATE TABLE vectortab2k_orc_null STORED AS ORC AS SELECT * FROM scratch_null +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@scratch_null +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2k_orc_null +POSTHOOK: Lineage: vectortab2k_orc_null.b SIMPLE [(scratch_null)scratch_null.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.b_null SIMPLE [(scratch_null)scratch_null.FieldSchema(name:b_null, type:bigint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.bo SIMPLE [(scratch_null)scratch_null.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.bo_null SIMPLE [(scratch_null)scratch_null.FieldSchema(name:bo_null, type:boolean, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.i SIMPLE [(scratch_null)scratch_null.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.i_null SIMPLE [(scratch_null)scratch_null.FieldSchema(name:i_null, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.si SIMPLE [(scratch_null)scratch_null.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.si_null SIMPLE [(scratch_null)scratch_null.FieldSchema(name:si_null, type:smallint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.t SIMPLE [(scratch_null)scratch_null.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc_null.t_null SIMPLE [(scratch_null)scratch_null.FieldSchema(name:t_null, type:tinyint, comment:null), ] +scratch_null.t scratch_null.si scratch_null.i scratch_null.b scratch_null.bo scratch_null.t_null scratch_null.si_null scratch_null.i_null scratch_null.b_null scratch_null.bo_null +PREHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null is null OR si > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null is null OR si > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vectortab2k_orc_null + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t_null (type: tinyint), si (type: smallint), i (type: int), (t_null is null or (si > 0) or (i < 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) + sort order: +++ + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: boolean) + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null is null OR si > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null is null OR si > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +c0 +26405879723 +PREHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null < 0 OR si > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null < 0 OR si > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vectortab2k_orc_null + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t_null (type: tinyint), si (type: smallint), i (type: int), ((t_null < 0) or (si > 0) or (i < 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) + sort order: +++ + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: boolean) + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null < 0 OR si > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null < 0 OR si > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +c0 +26405879144 +PREHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 OR si_null > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 OR si_null > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vectortab2k_orc_null + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t_null (type: tinyint), si_null (type: smallint), i (type: int), ((t_null < 0) or (si_null > 0) or (i < 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) + sort order: +++ + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: boolean) + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 OR si_null > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 OR si_null > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +c0 +18075200723 +PREHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (t_null < 0 OR si_null > 0 OR i_null < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i_null) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (t_null < 0 OR si_null > 0 OR i_null < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i_null) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vectortab2k_orc_null + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t_null (type: tinyint), si_null (type: smallint), i_null (type: int), ((t_null < 0) or (si_null > 0) or (i_null < 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) + sort order: +++ + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: boolean) + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 45620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (t_null < 0 OR si_null > 0 OR i_null < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i_null) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (t_null < 0 OR si_null > 0 OR i_null < 0) as multi_and_col from vectortab2k_orc_null + order by t_null, si_null, i_null) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +c0 +0 +PREHOOK: query: -- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null is null OR si > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) = 2 + order by t_null, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +POSTHOOK: query: -- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null is null OR si > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) = 2 + order by t_null, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +c0 +-29059205491 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null < 0 OR si > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) = 3 + order by t_null, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si, i, (t_null < 0 OR si > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) = 3 + order by t_null, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +c0 +5954312170 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 OR si_null > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) = 0 + order by t_null, si_null, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i, (t_null < 0 OR si_null > 0 OR i < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) = 0 + order by t_null, si_null, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +c0 +-19001666507 +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (t_null < 0 OR si_null > 0 OR i_null < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) != 2 + order by t_null, si_null, i_null) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (t_null < 0 OR si_null > 0 OR i_null < 0) as multi_and_col from vectortab2k_orc_null + where pmod(i,4) != 2 + order by t_null, si_null, i_null) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc_null +#### A masked pattern was here #### +c0 +0