diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 3f95be2..b80dae7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -479,7 +479,7 @@ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc, Mode mode) th mode); } if (ve == null) { - throw new HiveException("Could not vectorize expression: "+exprDesc.getName()); + throw new HiveException("Could not vectorize expression: "+exprDesc.toString()); } if (LOG.isDebugEnabled()) { LOG.debug("Input Expression = " + exprDesc.getTypeInfo() @@ -996,36 +996,53 @@ private VectorExpression getVectorExpressionForUdf(GenericUDF genericeUdf, int numChildren = (childExpr == null) ? 0 : childExpr.size(); - if (numChildren > 2 && genericeUdf != null && mode == Mode.FILTER && + if (genericeUdf != null && ((genericeUdf instanceof GenericUDFOPOr) || (genericeUdf instanceof GenericUDFOPAnd))) { // Special case handling for Multi-OR and Multi-AND. + boolean ok = true; // Assume. for (int i = 0; i < numChildren; i++) { ExprNodeDesc child = childExpr.get(i); String childTypeString = child.getTypeString(); - if (childTypeString == null) { - throw new HiveException("Null child type name string"); - } TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(childTypeString); Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); if (columnVectorType != ColumnVector.Type.LONG){ - return null; + ok = false; + break; } if (!(child instanceof ExprNodeGenericFuncDesc) && !(child instanceof ExprNodeColumnDesc)) { - return null; + ok = false; + break; } } - Class vclass; - if (genericeUdf instanceof GenericUDFOPOr) { - vclass = FilterExprOrExpr.class; - } else if (genericeUdf instanceof GenericUDFOPAnd) { - vclass = FilterExprAndExpr.class; + if (ok) { + // UNDONE: Are the limitation above too strict??? + Class vclass; + if (genericeUdf instanceof GenericUDFOPOr) { + if (mode == Mode.PROJECTION) { + vclass = ColMultiOrCol.class; + } else { + vclass = FilterExprOrExpr.class; + } + } else if (genericeUdf instanceof GenericUDFOPAnd) { + if (mode == Mode.PROJECTION) { + vclass = ColMultiAndCol.class; + } else { + vclass = FilterExprAndExpr.class; + } + } else { + throw new RuntimeException("Unexpected multi-child UDF"); + } + Mode childrenMode = getChildrenMode(mode, udfClass); + if (mode == Mode.PROJECTION) { + return createVectorMultiAndOrProjectionExpr(vclass, childExpr, childrenMode, returnType); + } else { + return createVectorExpression(vclass, childExpr, childrenMode, returnType); + } } else { - throw new RuntimeException("Unexpected multi-child UDF"); + // Fall below and evaluate regular vectorization expression. } - Mode childrenMode = getChildrenMode(mode, udfClass); - return createVectorExpression(vclass, childExpr, childrenMode, returnType); } if (numChildren > VectorExpressionDescriptor.MAX_NUM_ARGUMENTS) { return null; @@ -1064,6 +1081,37 @@ private VectorExpression getVectorExpressionForUdf(GenericUDF genericeUdf, return createVectorExpression(vclass, childExpr, childrenMode, returnType); } + private void determineChildrenVectorExprAndArguments(Class vectorClass, + List childExpr, int numChildren, Mode childrenMode, + VectorExpression.Type [] inputTypes, List children, Object[] arguments) + throws HiveException { + for (int i = 0; i < numChildren; i++) { + ExprNodeDesc child = childExpr.get(i); + String undecoratedName = getUndecoratedName(child.getTypeInfo().getTypeName()); + inputTypes[i] = VectorExpression.Type.getValue(undecoratedName); + if (inputTypes[i] == VectorExpression.Type.OTHER){ + throw new HiveException("No vector type for " + vectorClass.getSimpleName() + " argument #" + i + " type name " + undecoratedName); + } + if (child instanceof ExprNodeGenericFuncDesc) { + VectorExpression vChild = getVectorExpression(child, childrenMode); + children.add(vChild); + arguments[i] = vChild.getOutputColumn(); + } else if (child instanceof ExprNodeColumnDesc) { + int colIndex = getInputColumnIndex((ExprNodeColumnDesc) child); + if (childrenMode == Mode.FILTER) { + // In filter mode, the column must be a boolean + children.add(new SelectColumnIsTrue(colIndex)); + } + arguments[i] = colIndex; + } else if (child instanceof ExprNodeConstantDesc) { + Object scalarValue = getVectorTypeScalarValue((ExprNodeConstantDesc) child); + arguments[i] = (null == scalarValue) ? getConstantVectorExpression(null, child.getTypeInfo(), childrenMode) : scalarValue; + } else { + throw new HiveException("Cannot handle expression type: " + child.getClass().getSimpleName()); + } + } + } + private VectorExpression createVectorExpression(Class vectorClass, List childExpr, Mode childrenMode, TypeInfo returnType) throws HiveException { int numChildren = childExpr == null ? 0: childExpr.size(); @@ -1071,31 +1119,41 @@ private VectorExpression createVectorExpression(Class vectorClass, List children = new ArrayList(); Object[] arguments = new Object[numChildren]; try { + determineChildrenVectorExprAndArguments(vectorClass, childExpr, numChildren, childrenMode, + inputTypes, children, arguments); + VectorExpression vectorExpression = instantiateExpression(vectorClass, returnType, arguments); + vectorExpression.setInputTypes(inputTypes); + if ((vectorExpression != null) && !children.isEmpty()) { + vectorExpression.setChildExpressions(children.toArray(new VectorExpression[0])); + } + return vectorExpression; + } catch (Exception ex) { + throw new HiveException(ex); + } finally { + for (VectorExpression ve : children) { + ocm.freeOutputColumn(ve.getOutputColumn()); + } + } + } + + private VectorExpression createVectorMultiAndOrProjectionExpr(Class vectorClass, + List childExpr, Mode childrenMode, TypeInfo returnType) throws HiveException { + int numChildren = childExpr == null ? 0: childExpr.size(); + VectorExpression.Type [] inputTypes = new VectorExpression.Type[numChildren]; + List children = new ArrayList(); + Object[] arguments = new Object[numChildren]; + try { + determineChildrenVectorExprAndArguments(vectorClass, childExpr, numChildren, childrenMode, + inputTypes, children, arguments); + + // For Multi-AND/OR, transform the arguments -- column indices into an array of int. + int[] colNums = new int[numChildren]; for (int i = 0; i < numChildren; i++) { - ExprNodeDesc child = childExpr.get(i); - String undecoratedName = getUndecoratedName(child.getTypeInfo().getTypeName()); - inputTypes[i] = VectorExpression.Type.getValue(undecoratedName); - if (inputTypes[i] == VectorExpression.Type.OTHER){ - throw new HiveException("No vector type for " + vectorClass.getSimpleName() + " argument #" + i + " type name " + undecoratedName); - } - if (child instanceof ExprNodeGenericFuncDesc) { - VectorExpression vChild = getVectorExpression(child, childrenMode); - children.add(vChild); - arguments[i] = vChild.getOutputColumn(); - } else if (child instanceof ExprNodeColumnDesc) { - int colIndex = getInputColumnIndex((ExprNodeColumnDesc) child); - if (childrenMode == Mode.FILTER) { - // In filter mode, the column must be a boolean - children.add(new SelectColumnIsTrue(colIndex)); - } - arguments[i] = colIndex; - } else if (child instanceof ExprNodeConstantDesc) { - Object scalarValue = getVectorTypeScalarValue((ExprNodeConstantDesc) child); - arguments[i] = (null == scalarValue) ? getConstantVectorExpression(null, child.getTypeInfo(), childrenMode) : scalarValue; - } else { - throw new HiveException("Cannot handle expression type: " + child.getClass().getSimpleName()); - } + colNums[i] = (Integer) arguments[i]; } + arguments = new Object[1]; + arguments[0] = colNums; + VectorExpression vectorExpression = instantiateExpression(vectorClass, returnType, arguments); vectorExpression.setInputTypes(inputTypes); if ((vectorExpression != null) && !children.isEmpty()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColMultiAndCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColMultiAndCol.java new file mode 100644 index 0000000..c689731 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColMultiAndCol.java @@ -0,0 +1,438 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.Arrays; + +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +import com.google.common.base.Preconditions; + +/** + * Evaluate AND of 3 or more boolean columns and store result in the output boolean column. + */ +public class ColMultiAndCol extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int[] colNums; + private int outputColumn; + private int[] andSelected; + private boolean[] intermediateNulls; + + public ColMultiAndCol(int[] colNums, int outputColumn) { + this(); + this.colNums = colNums; + this.outputColumn = outputColumn; + andSelected = new int[VectorizedRowBatch.DEFAULT_SIZE]; + intermediateNulls = new boolean[VectorizedRowBatch.DEFAULT_SIZE]; + } + + public ColMultiAndCol() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + Preconditions.checkState(colNums.length >= 2); + + final int n = batch.size; + if (n <= 0) { + // Nothing to do + return; + } + + if (childExpressions != null) { + // Evaluate first child expression. Other child are conditionally evaluated later + // based on whether there is still the possibility of any true and/or null results and + // only evaluated on current true and/or null rows. + VectorExpression ve = childExpressions[0]; + Preconditions.checkState(ve.getOutputColumn() == colNums[0]); + ve.evaluate(batch); + } + + int[] sel = batch.selected; + + LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; + long[] outputVector = outV.vector; + + /** + * Null processing complicates the algorithm here for Multi-AND. + * + * All true --> true + * 0 or more true with 1 or more null --> result = null + * Any false --> false + * + * We remember nulls in the intermediateNulls array as we go along. + * Entries in andSelected are for rows that are either true and/or null. + * + * The flags andRepeating and andRepeatingIsNull track if the whole batch so far + * is true or null. + */ + boolean andRepeating = false; + boolean andRepeatingIsNull = false; + int andSel = 0; + Arrays.fill(intermediateNulls, 0, VectorizedRowBatch.DEFAULT_SIZE, false); + + outV.reset(); + + LongColumnVector firstColVector = (LongColumnVector) batch.cols[colNums[0]]; + long[] firstVector = firstColVector.vector; + + if (firstColVector.isRepeating) { + if (firstColVector.noNulls || !firstColVector.isNull[0]) { + if (firstVector[0] == 0) { + outV.isRepeating = true; + outputVector[0] = 0; + return; + } + } else { + Preconditions.checkState(firstColVector.isNull[0]); + + // This stage null still could have influence even if andRepeating is also a null. + andRepeatingIsNull = true; + } + andRepeating = true; + } else if (firstColVector.noNulls) { + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (firstVector[i] == 1) { + andSelected[andSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (firstVector[i] == 1) { + andSelected[andSel++] = i; + } + } + } + } else { + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (firstColVector.isNull[i]) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } else if (firstVector[i] == 1) { + andSelected[andSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (firstColVector.isNull[i]) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } else if (firstVector[i] == 1) { + andSelected[andSel++] = i; + } + } + } + } + + // Process input #2 and above. + + int colNum = 1; + do { + if (!andRepeating && andSel == 0) { + + // Nothing is true (how philosophical!). + break; + } + + if (childExpressions != null) { + if (andRepeating) { + // We need to start with a full evaluate. + VectorExpression ve = childExpressions[colNum]; + Preconditions.checkState(ve.getOutputColumn() == colNums[colNum]); + ve.evaluate(batch); + } else { + // Evaluate next child expression. + // But only andSelected (current true or true with nulls rows). + boolean saveSelectedInUse = batch.selectedInUse; + int[] saveSelected = sel; + batch.selectedInUse = true; + batch.selected = andSelected; + + VectorExpression ve = childExpressions[colNum]; + Preconditions.checkState(ve.getOutputColumn() == colNums[colNum]); + ve.evaluate(batch); + + batch.selectedInUse = saveSelectedInUse; + batch.selected = saveSelected; + } + } + + LongColumnVector nextColVector = (LongColumnVector) batch.cols[colNums[colNum]]; + long[] nextVector = nextColVector.vector; + + if (andRepeating) { + if (nextColVector.isRepeating) { + if (nextColVector.noNulls || !nextColVector.isNull[0]) { + if (nextVector[0] == 0) { + outV.isRepeating = true; + outputVector[0] = 0; + return; + } else { + // andRepeatingIsNull goes on to the next stage, too. + } + } else { + Preconditions.checkState(nextColVector.isNull[0]); + + // This stage null still could have influence even if andRepeating is also a null. + andRepeatingIsNull = true; + } + // Continue as andRepeating + } else if (nextColVector.noNulls) { + + // Switch away from andRepeating. + + Preconditions.checkState(andSel == 0); + andRepeating = false; + + if (andRepeatingIsNull) { + + // We only set intermediateNulls when the next row is true. + // The andSelected array will represent rows with true values and at least one null. + // If all future rounds are true, then the null will cause the result to be null. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (nextVector[i] == 1) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (nextVector[i] == 1) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } + } + } + andRepeatingIsNull = false; + } else { + + // Previous rounds were all true. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (nextVector[i] == 1) { + andSelected[andSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (nextVector[i] == 1) { + andSelected[andSel++] = i; + } + } + } + } + } else { + + // Can have nulls. + + // Switch away from andRepeating. + + Preconditions.checkState(andSel == 0); + andRepeating = false; + + if (andRepeatingIsNull) { + + // We only set intermediateNulls when the next row is true. + // The andSelected array will represent rows with true values and at least one null. + // If all future rounds are true, then the null will cause the result to be null. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (nextColVector.isNull[i] || nextVector[i] == 1) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (nextColVector.isNull[i] || nextVector[i] == 1) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } + } + } + andRepeatingIsNull = false; + } else { + + // Previous rounds were all true. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (nextColVector.isNull[i]) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } if (nextVector[i] == 1) { + andSelected[andSel++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (nextColVector.isNull[i]) { + intermediateNulls[i] = true; + andSelected[andSel++] = i; + } if (nextVector[i] == 1) { + andSelected[andSel++] = i; + } + } + } + } + } + } else { + + // The andSelected array contains the rows that are currently true and possibly + // had some nulls. + + if (nextColVector.isRepeating) { + if (nextColVector.noNulls) { + if (nextVector[0] == 0) { + outV.isRepeating = true; + outputVector[0] = 0; + return; + } + } else if (nextColVector.isNull[0]) { + // Repeating null has influence over current andSelected entries. + for (int j = 0; j < andSel; j++) { + int i = andSelected[j]; + intermediateNulls[i] = true; + } + } + // All prior rows in andSelected remain because they are true. They may have nulls. + } else if (nextColVector.noNulls) { + + // New round eliminates false rows. + int newSel = 0; + for (int j = 0; j < andSel; j++) { + int i = andSelected[j]; + if (nextVector[i] == 1) { + andSelected[newSel++] = i; + } + } + andSel = newSel; + } else { + + // Can have nulls. + + // New round eliminates sets true rows and notes nulls. + int newSel = 0; + for (int j = 0; j < andSel; j++) { + int i = andSelected[j]; + if (nextColVector.isNull[i]) { + // Keep the entry with true(s) and now has at least one null. + intermediateNulls[i] = true; + andSelected[newSel++] = i; + } else if (nextVector[i] == 1) { + // Continue on whether andSelected entry is true. It may have nulls. + andSelected[newSel++] = i; + } + } + andSel = newSel; + } + } + } while (++colNum < colNums.length); + + // Produce final result. + + if (andRepeating) { + outV.isRepeating = true; + if (andRepeatingIsNull) { + outV.noNulls = false; + outV.isNull[0] = true; + } else { + outputVector[0] = 0; + } + } else if (andSel == 0) { + outV.isRepeating = true; + outputVector[0] = 0; + } else { + // Ok, some rows were true and/or null throughout. + int andIndex = 0; + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (andIndex < andSel && andSelected[andIndex] == i) { + if (intermediateNulls[i]) { + outV.noNulls = false; + outV.isNull[i] = true; + } else { + outputVector[i] = 1; + } + andIndex++; + } else { + outputVector[i] = 0; + } + } + Preconditions.checkState(andIndex == andSel); + } else { + Arrays.fill(outputVector, 0, n, 0); + for (int i = 0; i != n; i++) { + if (andIndex < andSel && andSelected[andIndex] == i) { + if (intermediateNulls[i]) { + outV.noNulls = false; + outV.isNull[i] = true; + } else { + outputVector[i] = 1; + } + andIndex++; + } + } + Preconditions.checkState(andIndex == andSel); + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "boolean"; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("long"), + VectorExpressionDescriptor.ArgumentType.getType("long")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColMultiOrCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColMultiOrCol.java new file mode 100644 index 0000000..57f5db0 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColMultiOrCol.java @@ -0,0 +1,459 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.Arrays; + +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +import com.google.common.base.Preconditions; + +/** + * This class performs OR expression on two input columns and stores, + * the boolean output in a separate output column. The boolean values + * are supposed to be represented as 0/1 in a long vector. + */ +public class ColMultiOrCol extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int[] colNums; + private int outputColumn; + private int[] orSelected; + private boolean[] intermediateNulls; + + public ColMultiOrCol(int[] colNums, int outputColumn) { + this(); + this.colNums = colNums; + this.outputColumn = outputColumn; + orSelected = new int[VectorizedRowBatch.DEFAULT_SIZE]; + intermediateNulls = new boolean[VectorizedRowBatch.DEFAULT_SIZE]; + } + + public ColMultiOrCol() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + Preconditions.checkState(colNums.length >= 2); + + final int n = batch.size; + if (n <= 0) { + // Nothing to do + return; + } + + if (childExpressions != null) { + // Evaluate first child expression. Other child are conditionally evaluated later + // based on whether there is still the possibility of any true and/or null results and + // only evaluated on current false and/or null rows. + VectorExpression ve = childExpressions[0]; + Preconditions.checkState(ve.getOutputColumn() == colNums[0]); + ve.evaluate(batch); + } + + int[] sel = batch.selected; + + LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; + long[] outputVector = outV.vector; + + /** + * Null processing complicates the algorithm here for Multi-OR. + * + * Any true --> true + * 0 or more false with 1 or more null --> result = null + * All false --> false + * + * We remember true as 1 in the outputVector because once set, it doesn't get unset. + * We remember nulls in the intermediateNulls so we can later ignore it if a row is true. + * Entries in orSelected are for rows that are either false and/or null. + * + * The flags orRepeating, and orRepeatingHasNulls track if the whole batch so far + * false and/or null. We are only tracking if there have been some nulls here. + */ + boolean orRepeating = false; + boolean orRepeatingHasNulls = false; + int orSel = 0; + Arrays.fill(intermediateNulls, 0, VectorizedRowBatch.DEFAULT_SIZE, false); + + // We set true rows as we find them. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = 0; + } + } else { + Arrays.fill(outputVector, 0, n, 0); + } + + outV.reset(); + + LongColumnVector firstColVector = (LongColumnVector) batch.cols[colNums[0]]; + long[] firstVector = firstColVector.vector; + + if (firstColVector.isRepeating) { + if (firstColVector.noNulls || !firstColVector.isNull[0]) { + if (firstVector[0] == 1) { + outV.isRepeating = true; + outputVector[0] = 1; + return; + } + } else { + Preconditions.checkState(firstColVector.isNull[0]); + + // This stage null still could have influence even if andRepeating is also a null. + orRepeatingHasNulls = true; + } + orRepeating = true; + } else if (firstColVector.noNulls) { + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (firstVector[i] == 0) { + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } else { + for (int i = 0; i != n; i++) { + if (firstVector[i] == 0) { + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } + } else { + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (firstColVector.isNull[i]) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else if (firstVector[i] == 0) { + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } else { + for (int i = 0; i != n; i++) { + if (firstColVector.isNull[i]) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else if (firstVector[i] == 0) { + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } + } + + // Process input #2 and above. + + int colNum = 1; + do { + if (!orRepeating && orSel == 0) { + + // Everything is true (how philosophical!). + break; + } + + if (childExpressions != null) { + if (orRepeating) { + // We need to start with a full evaluate. + VectorExpression ve = childExpressions[colNum]; + Preconditions.checkState(ve.getOutputColumn() == colNums[colNum]); + ve.evaluate(batch); + } else { + // Evaluate next child expression. + // But only orSelected (current false or false with nulls rows). + boolean saveSelectedInUse = batch.selectedInUse; + int[] saveSelected = sel; + batch.selectedInUse = true; + batch.selected = orSelected; + + VectorExpression ve = childExpressions[colNum]; + Preconditions.checkState(ve.getOutputColumn() == colNums[colNum]); + ve.evaluate(batch); + + batch.selectedInUse = saveSelectedInUse; + batch.selected = saveSelected; + } + } + + LongColumnVector nextColVector = (LongColumnVector) batch.cols[colNums[colNum]]; + long[] nextVector = nextColVector.vector; + + if (orRepeating) { + + // The orRepeating flag means the whole batch is false or nulls. + + if (nextColVector.isRepeating) { + if (nextColVector.noNulls || !nextColVector.isNull[0]) { + if (nextVector[0] == 1) { + outV.isRepeating = true; + outputVector[0] = 1; + return; + } else { + // The orRepeatingHasNulls flag goes on to the next stage, too. + } + } else { + Preconditions.checkState(nextColVector.isNull[0]); + + // This stage null still could have influence even if andRepeating is also a null. + orRepeatingHasNulls = true; + } + // Continue as orRepeating + } else if (nextColVector.noNulls) { + + // Switch away from orRepeating. + Preconditions.checkState(orSel == 0); + orRepeating = false; + + if (orRepeatingHasNulls) { + + // We only set intermediateNulls when the next row is false. + // The orSelected array will represent rows with false values and at least one null. + // If all future rounds are false, then the null will cause the result to be null. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (nextVector[i] == 0) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } else { + for (int i = 0; i != n; i++) { + if (nextVector[i] == 0) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } + orRepeatingHasNulls = false; + } else { + + // Previous rounds were all false. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (nextVector[i] == 0) { + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } else { + for (int i = 0; i != n; i++) { + if (nextVector[i] == 0) { + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } + } + } else { + + // Can have nulls. + + // Switch away from orRepeating. + Preconditions.checkState(orSel == 0); + orRepeating = false; + + if (orRepeatingHasNulls) { + + // We only set intermediateNulls when the next row is false. + // The orSelected array will represent rows with false values and at least one null. + // If all future rounds are false, then the null will cause the result to be null. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (nextColVector.isNull[i] || nextVector[i] == 0) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } else { + for (int i = 0; i != n; i++) { + if (nextColVector.isNull[i] || nextVector[i] == 0) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } + orRepeatingHasNulls = false; + } else { + + // Previous rounds were all false. + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (nextColVector.isNull[i]) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else if (nextVector[i] == 0) { + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } else { + for (int i = 0; i != n; i++) { + if (nextColVector.isNull[i]) { + intermediateNulls[i] = true; + orSelected[orSel++] = i; + } else if (nextVector[i] == 0) { + orSelected[orSel++] = i; + } else { + outputVector[i] = 1; + } + } + } + } + } + } else { + + // The outputVector array contains the rows that are currently true and the rows + // are false and/or have null rows are represented in the intermediateNulls and + // orSelected arrays. + + if (nextColVector.isRepeating) { + if (nextColVector.noNulls) { + if (nextVector[0] == 1) { + outV.isRepeating = true; + outputVector[0] = 1; + return; + } + } else if (nextColVector.isNull[0]) { + + // Repeating null has influence over current orSelected entries. + for (int j = 0; j < orSel; j++) { + int i = orSelected[j]; + intermediateNulls[i] = true; + } + } + } else if (nextColVector.noNulls) { + + // Set true entries in output vector. + int newSel = 0; + for (int j = 0; j < orSel; j++) { + int i = orSelected[j]; + if (nextVector[i] == 0) { + orSelected[newSel++] = i; + } else { + outputVector[i] = 1; + } + } + orSel = newSel; + } else { + + // Can have nulls. + + // New round eliminates sets true rows and notes nulls. + int newSel = 0; + for (int j = 0; j < orSel; j++) { + int i = orSelected[j]; + if (nextColVector.isNull[i]) { + // Keep the entry with true(s) and now has at least one null. + intermediateNulls[i] = true; + orSelected[newSel++] = i; + } else if (nextVector[i] == 0) { + // Continue on whether orSelected entry is false. It may have nulls. + orSelected[newSel++] = i; + } else { + outputVector[i] = 1; + } + } + orSel = newSel; + } + } + } while (++colNum < colNums.length); + + // Produce final result. + + if (orRepeating) { + + // The orRepeating flags means the whole batch is false and may have nulls. + outV.isRepeating = true; + if (orRepeatingHasNulls) { + outV.noNulls = false; + outV.isNull[0] = true; + } else { + outputVector[0] = 0; + } + } else { + + // When there is a false entry and there were intermediate nulls, the result row will be null. + // Note the true entries were already set in outputVector as we processed the rounds above. + for (int j = 0; j < orSel; j++) { + int i = orSelected[j]; + Preconditions.checkState(outputVector[i] == 0); + if (intermediateNulls[i]) { + outV.noNulls = false; + outV.isNull[i] = true; + } + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "boolean"; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("long"), + VectorExpressionDescriptor.ArgumentType.getType("long")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/test/queries/clientpositive/vector_multi_and.q ql/src/test/queries/clientpositive/vector_multi_and.q new file mode 100644 index 0000000..32486e0 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_multi_and.q @@ -0,0 +1,47 @@ +set hive.cli.print.header=true; +set hive.explain.user=false; +SET hive.auto.convert.join=true; +set hive.fetch.task.conversion=none; +set hive.mapred.mode=nonstrict; + +-- SORT_QUERY_RESULTS + +create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k; + +CREATE TABLE scratch AS SELECT t, si, i, b FROM vectortab2k; +INSERT INTO TABLE scratch VALUES (NULL, NULL, NULL, NULL); + +CREATE TABLE vectortab2k_orc STORED AS ORC AS SELECT * FROM scratch; + +SET hive.vectorized.execution.enabled=true; + +EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc order by t, si, i) as q; + +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc order by t, si, i) as q; + +EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 AND si > 0 AND i < 0 AND b > 0) as multi_and_col from vectortab2k_orc order by t, si, i, b) as q; + +SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 AND si > 0 AND i < 0 AND b > 0) as multi_and_col from vectortab2k_orc order by t, si, i, b) as q; diff --git ql/src/test/queries/clientpositive/vector_multi_or.q ql/src/test/queries/clientpositive/vector_multi_or.q new file mode 100644 index 0000000..813b343 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_multi_or.q @@ -0,0 +1,47 @@ +set hive.cli.print.header=true; +set hive.explain.user=false; +SET hive.auto.convert.join=true; +set hive.fetch.task.conversion=none; +set hive.mapred.mode=nonstrict; + +-- SORT_QUERY_RESULTS + +create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k; + +CREATE TABLE scratch AS SELECT t, si, i, b FROM vectortab2k; +INSERT INTO TABLE scratch VALUES (NULL, NULL, NULL, NULL); + +CREATE TABLE vectortab2k_orc STORED AS ORC AS SELECT * FROM scratch; + +SET hive.vectorized.execution.enabled=true; + +EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc order by t, si, i) as q; + +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc order by t, si, i) as q; + +EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 OR si > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc order by t, si, i, b) as q; + +SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 OR si > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc order by t, si, i, b) as q; diff --git ql/src/test/results/clientpositive/vector_multi_and.q.out ql/src/test/results/clientpositive/vector_multi_and.q.out new file mode 100644 index 0000000..db32f17 --- /dev/null +++ ql/src/test/results/clientpositive/vector_multi_and.q.out @@ -0,0 +1,225 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2k +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@vectortab2k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@vectortab2k +PREHOOK: query: CREATE TABLE scratch AS SELECT t, si, i, b FROM vectortab2k +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@vectortab2k +PREHOOK: Output: database:default +PREHOOK: Output: default@scratch +POSTHOOK: query: CREATE TABLE scratch AS SELECT t, si, i, b FROM vectortab2k +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@vectortab2k +POSTHOOK: Output: database:default +POSTHOOK: Output: default@scratch +POSTHOOK: Lineage: scratch.b SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: scratch.i SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: scratch.si SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: scratch.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] +t si i b +PREHOOK: query: INSERT INTO TABLE scratch VALUES (NULL, NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@scratch +POSTHOOK: query: INSERT INTO TABLE scratch VALUES (NULL, NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@scratch +POSTHOOK: Lineage: scratch.b EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: scratch.i EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: scratch.si EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: scratch.t EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: CREATE TABLE vectortab2k_orc STORED AS ORC AS SELECT * FROM scratch +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@scratch +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2k_orc +POSTHOOK: query: CREATE TABLE vectortab2k_orc STORED AS ORC AS SELECT * FROM scratch +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@scratch +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2k_orc +POSTHOOK: Lineage: vectortab2k_orc.b SIMPLE [(scratch)scratch.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc.i SIMPLE [(scratch)scratch.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc.si SIMPLE [(scratch)scratch.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc.t SIMPLE [(scratch)scratch.FieldSchema(name:t, type:tinyint, comment:null), ] +scratch.t scratch.si scratch.i scratch.b +PREHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc order by t, si, i) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc order by t, si, i) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vectortab2k_orc + Statistics: Num rows: 2001 Data size: 38108 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: tinyint), si (type: smallint), i (type: int), ((t < 0) and (si > 0) and (i < 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2001 Data size: 38108 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) + sort order: +++ + Statistics: Num rows: 2001 Data size: 38108 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: boolean) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2001 Data size: 38108 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc order by t, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 AND si > 0 AND i < 0) as multi_and_col from vectortab2k_orc order by t, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +c0 +22128070258 +PREHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 AND si > 0 AND i < 0 AND b > 0) as multi_and_col from vectortab2k_orc order by t, si, i, b) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 AND si > 0 AND i < 0 AND b > 0) as multi_and_col from vectortab2k_orc order by t, si, i, b) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vectortab2k_orc + Statistics: Num rows: 2001 Data size: 38108 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), ((t < 0) and (si > 0) and (i < 0) and (b > 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2001 Data size: 38108 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint) + sort order: ++++ + Statistics: Num rows: 2001 Data size: 38108 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: boolean) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,KEY.reducesinkkey3,VALUE._col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2001 Data size: 38108 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 AND si > 0 AND i < 0 AND b > 0) as multi_and_col from vectortab2k_orc order by t, si, i, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 AND si > 0 AND i < 0 AND b > 0) as multi_and_col from vectortab2k_orc order by t, si, i, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +c0 +-1983102543 diff --git ql/src/test/results/clientpositive/vector_multi_or.q.out ql/src/test/results/clientpositive/vector_multi_or.q.out new file mode 100644 index 0000000..d7b3ae6 --- /dev/null +++ ql/src/test/results/clientpositive/vector_multi_or.q.out @@ -0,0 +1,225 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2k +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@vectortab2k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@vectortab2k +PREHOOK: query: CREATE TABLE scratch AS SELECT t, si, i, b FROM vectortab2k +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@vectortab2k +PREHOOK: Output: database:default +PREHOOK: Output: default@scratch +POSTHOOK: query: CREATE TABLE scratch AS SELECT t, si, i, b FROM vectortab2k +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@vectortab2k +POSTHOOK: Output: database:default +POSTHOOK: Output: default@scratch +POSTHOOK: Lineage: scratch.b SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: scratch.i SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: scratch.si SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: scratch.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] +t si i b +PREHOOK: query: INSERT INTO TABLE scratch VALUES (NULL, NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@scratch +POSTHOOK: query: INSERT INTO TABLE scratch VALUES (NULL, NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@scratch +POSTHOOK: Lineage: scratch.b EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: scratch.i EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: scratch.si EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: scratch.t EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: CREATE TABLE vectortab2k_orc STORED AS ORC AS SELECT * FROM scratch +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@scratch +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2k_orc +POSTHOOK: query: CREATE TABLE vectortab2k_orc STORED AS ORC AS SELECT * FROM scratch +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@scratch +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2k_orc +POSTHOOK: Lineage: vectortab2k_orc.b SIMPLE [(scratch)scratch.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc.i SIMPLE [(scratch)scratch.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc.si SIMPLE [(scratch)scratch.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: vectortab2k_orc.t SIMPLE [(scratch)scratch.FieldSchema(name:t, type:tinyint, comment:null), ] +scratch.t scratch.si scratch.i scratch.b +PREHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc order by t, si, i) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc order by t, si, i) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vectortab2k_orc + Statistics: Num rows: 2001 Data size: 38108 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: tinyint), si (type: smallint), i (type: int), ((t < 0) or (si > 0) or (i < 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2001 Data size: 38108 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) + sort order: +++ + Statistics: Num rows: 2001 Data size: 38108 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: boolean) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2001 Data size: 38108 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc order by t, si, i) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0 OR si > 0 OR i < 0) as multi_or_col from vectortab2k_orc order by t, si, i) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +c0 +22128071730 +PREHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 OR si > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc order by t, si, i, b) as q +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 OR si > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc order by t, si, i, b) as q +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vectortab2k_orc + Statistics: Num rows: 2001 Data size: 38108 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), ((t < 0) or (si > 0) or (i < 0) or (b > 0)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2001 Data size: 38108 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint) + sort order: ++++ + Statistics: Num rows: 2001 Data size: 38108 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: boolean) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,KEY.reducesinkkey3,VALUE._col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2001 Data size: 38108 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 OR si > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc order by t, si, i, b) as q +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t < 0 OR si > 0 OR i < 0 OR b > 0) as multi_or_col from vectortab2k_orc order by t, si, i, b) as q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k_orc +#### A masked pattern was here #### +c0 +-1983100769