diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index c3940cbfe3..7dc4c812c7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -94,7 +94,6 @@ import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor; import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; @@ -102,7 +101,6 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.udf.*; import org.apache.hadoop.hive.ql.udf.generic.*; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; @@ -126,8 +124,6 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; -import org.apache.hadoop.util.StringUtils; -import org.apache.hive.common.util.DateUtils; import com.google.common.annotations.VisibleForTesting; @@ -2322,13 +2318,10 @@ private VectorExpression getBetweenFilterExpression(List childExpr return createVectorExpression(cl, childrenAfterNot, VectorExpressionDescriptor.Mode.PROJECTION, returnType); } - private boolean isColumnOrNonNullConst(ExprNodeDesc exprNodeDesc) { - if (exprNodeDesc instanceof ExprNodeColumnDesc) { - return true; - } + private boolean isNullConst(ExprNodeDesc exprNodeDesc) { if (exprNodeDesc instanceof ExprNodeConstantDesc) { String typeString = exprNodeDesc.getTypeString(); - if (!typeString.equalsIgnoreCase("void")) { + if (typeString.equalsIgnoreCase("void")) { return true; } } @@ -2341,33 +2334,47 @@ private VectorExpression getWhenExpression(List childExpr, if (mode != VectorExpressionDescriptor.Mode.PROJECTION) { return null; } - if (childExpr.size() != 3) { - // For now, we only optimize the 2 value case. - return null; - } + final int size = childExpr.size(); - /* - * When we have 2 simple values: - * CASE WHEN boolExpr THEN column | const ELSE column | const END - * then we can convert to: IF (boolExpr THEN column | const ELSE column | const) - */ - // CONSIDER: Adding a version of IfExpr* than can handle a non-column/const expression in the - // THEN or ELSE. - ExprNodeDesc exprNodeDesc1 = childExpr.get(1); - ExprNodeDesc exprNodeDesc2 = childExpr.get(2); - if (isColumnOrNonNullConst(exprNodeDesc1) && - isColumnOrNonNullConst(exprNodeDesc2)) { - // Yes. - GenericUDFIf genericUDFIf = new GenericUDFIf(); - return - getVectorExpressionForUdf( - genericUDFIf, - GenericUDFIf.class, - childExpr, - mode, - returnType); - } - return null; // Not handled by vector classes yet. + final ExprNodeDesc whenDesc = childExpr.get(0); + final ExprNodeDesc thenDesc = childExpr.get(1); + final ExprNodeDesc elseDesc; + + if (size == 2) { + elseDesc = new ExprNodeConstantDesc(returnType, null); + } else if (size == 3) { + elseDesc = childExpr.get(2); + } else { + final GenericUDFWhen udfWhen = new GenericUDFWhen(); + elseDesc = new ExprNodeGenericFuncDesc(returnType, udfWhen, udfWhen.getUdfName(), + childExpr.subList(2, childExpr.size())); + } + + if (isNullConst(thenDesc)) { + final VectorExpression whenExpr = getVectorExpression(whenDesc, mode); + final VectorExpression elseExpr = getVectorExpression(elseDesc, mode); + final VectorExpression resultExpr = new IfExprNullColumn( + whenExpr.getOutputColumn(), elseExpr.getOutputColumn(), + ocm.allocateOutputColumn(returnType)); + resultExpr.setChildExpressions(new VectorExpression[] {whenExpr, elseExpr}); + resultExpr.setOutputType(returnType.getTypeName()); + return resultExpr; + } + if (isNullConst(elseDesc)) { + final VectorExpression whenExpr = getVectorExpression(whenDesc, mode); + final VectorExpression thenExpr = getVectorExpression(thenDesc, mode); + final VectorExpression resultExpr = new IfExprColumnNull( + whenExpr.getOutputColumn(), thenExpr.getOutputColumn(), + ocm.allocateOutputColumn(returnType)); + resultExpr.setChildExpressions(new VectorExpression[] {whenExpr, thenExpr}); + resultExpr.setOutputType(returnType.getTypeName()); + return resultExpr; + } + final GenericUDFIf genericUDFIf = new GenericUDFIf(); + final List ifChildExpr = Arrays.asList(whenDesc, thenDesc, elseDesc); + final ExprNodeGenericFuncDesc exprNodeDesc = + new ExprNodeGenericFuncDesc(returnType, genericUDFIf, "if", ifChildExpr); + return getVectorExpression(exprNodeDesc, mode); } /* diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java new file mode 100644 index 0000000000..c42216da53 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java @@ -0,0 +1,92 @@ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +public class IfExprColumnNull extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private final int arg1Column; + private final int arg2Column; + private final int outputColumn; + + public IfExprColumnNull(int arg1Column, int arg2Column, int outputColumn) { + this.arg1Column = arg1Column; + this.arg2Column = arg2Column; + this.outputColumn = outputColumn; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + final LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + final ColumnVector arg2ColVector = batch.cols[arg2Column]; + final ColumnVector outputColVector = batch.cols[outputColumn]; + + final int[] sel = batch.selected; + final int n = batch.size; + final boolean[] null1 = arg1ColVector.isNull; + final long[] vector1 = arg1ColVector.vector; + final boolean[] isNull = outputColVector.isNull; + + if (n == 0) { + return; + } + + arg2ColVector.flatten(batch.selectedInUse, sel, n); + + if (arg1ColVector.isRepeating) { + if (!null1[0] && vector1[0] == 1) { + outputColVector.setElement(0, 0, arg2ColVector); + } else { + outputColVector.noNulls = false; + isNull[0] = true; + } + return; + } + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + if (!null1[0] && vector1[i] == 1) { + outputColVector.setElement(i, i, arg2ColVector); + } else { + outputColVector.noNulls = false; + isNull[i] = true; + } + } + } else { + for (int i = 0; i < n; i++) { + if (!null1[0] && vector1[i] == 1) { + outputColVector.setElement(i, i, arg2ColVector); + } else { + outputColVector.noNulls = false; + isNull[i] = true; + } + } + } + + arg2ColVector.unFlatten(); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", col "+ arg2Column + ", null"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + throw new UnsupportedOperationException("Undefined descriptor"); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java new file mode 100644 index 0000000000..aec07a9e48 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java @@ -0,0 +1,92 @@ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +public class IfExprNullColumn extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private final int arg1Column; + private final int arg2Column; + private final int outputColumn; + + public IfExprNullColumn(int arg1Column, int arg2Column, int outputColumn) { + this.arg1Column = arg1Column; + this.arg2Column = arg2Column; + this.outputColumn = outputColumn; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + final LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + final ColumnVector arg2ColVector = batch.cols[arg2Column]; + final ColumnVector outputColVector = batch.cols[outputColumn]; + + final int[] sel = batch.selected; + final int n = batch.size; + final boolean[] null1 = arg1ColVector.isNull; + final long[] vector1 = arg1ColVector.vector; + final boolean[] isNull = outputColVector.isNull; + + if (n == 0) { + return; + } + + arg2ColVector.flatten(batch.selectedInUse, sel, n); + + if (arg1ColVector.isRepeating) { + if (!null1[0] && vector1[0] == 1) { + outputColVector.noNulls = false; + isNull[0] = true; + } else { + outputColVector.setElement(0, 0, arg2ColVector); + } + return; + } + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + if (!null1[0] && vector1[i] == 1) { + outputColVector.noNulls = false; + isNull[i] = true; + } else { + outputColVector.setElement(i, i, arg2ColVector); + } + } + } else { + for (int i = 0; i < n; i++) { + if (!null1[0] && vector1[i] == 1) { + outputColVector.noNulls = false; + isNull[i] = true; + } else { + outputColVector.setElement(i, i, arg2ColVector); + } + } + } + + arg2ColVector.unFlatten(); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String vectorExpressionParameters() { + return "col " + arg1Column + ", null, col "+ arg2Column; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + throw new UnsupportedOperationException("Undefined descriptor"); + } +} diff --git ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out index 06dde804dc..28edb6f5b3 100644 --- ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out +++ ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out @@ -50,13 +50,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumns: [0, 3] - selectExpressions: VectorUDFAdaptor(CASE WHEN (bool) THEN (1) WHEN ((not bool)) THEN (0) ELSE (null) END)(children: NotCol(col 1) -> 2:boolean) -> 3:int + projectedOutputColumns: [0, 5] + selectExpressions: IfExprLongScalarLongColumn(col 1, val 1, col 4)(children: IfExprColumnNull(col 2, col 3, null)(children: NotCol(col 1) -> 2:boolean, ConstantVectorExpression(val 0) -> 3:long) -> 4:int) -> 5:long Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 3) -> bigint + aggregators: VectorUDAFCount(col 5) -> bigint className: VectorGroupByOperator vectorOutput: true keyExpressions: col 0 @@ -84,7 +84,7 @@ STAGE PLANS: groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true Reducer 2 Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/vector_when_case_null.q.out ql/src/test/results/clientpositive/vector_when_case_null.q.out index a7ab4ef43e..5ae4b99b55 100644 --- ql/src/test/results/clientpositive/vector_when_case_null.q.out +++ ql/src/test/results/clientpositive/vector_when_case_null.q.out @@ -44,13 +44,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumns: [0, 3] - selectExpressions: VectorUDFAdaptor(CASE WHEN (bool) THEN (1) WHEN ((not bool)) THEN (0) ELSE (null) END)(children: NotCol(col 1) -> 2:boolean) -> 3:int + projectedOutputColumns: [0, 5] + selectExpressions: IfExprLongScalarLongColumn(col 1, val 1, col 4)(children: IfExprColumnNull(col 2, col 3, null)(children: NotCol(col 1) -> 2:boolean, ConstantVectorExpression(val 0) -> 3:long) -> 4:int) -> 5:long Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 3) -> bigint + aggregators: VectorUDAFCount(col 5) -> bigint className: VectorGroupByOperator vectorOutput: true keyExpressions: col 0 @@ -78,7 +78,7 @@ STAGE PLANS: groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true Reduce Vectorization: enabled: false