diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java index 8cae274..bd08c86 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java @@ -29,20 +29,21 @@ private final int arg1Column; private final int arg2Column; private final int outputColumn; + private IfExprConditionalFilter cf; public IfExprColumnNull(int arg1Column, int arg2Column, int outputColumn) { this.arg1Column = arg1Column; this.arg2Column = arg2Column; this.outputColumn = outputColumn; + this.cf = new IfExprConditionalFilter(arg1Column, arg2Column); } @Override public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { - super.evaluateChildren(batch); + cf.evaluateIfConditionalExpr(batch, childExpressions); } - final LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; final ColumnVector arg2ColVector = batch.cols[arg2Column]; final ColumnVector outputColVector = batch.cols[outputColumn]; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprConditionalFilter.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprConditionalFilter.java new file mode 100644 index 0000000..5c1c400 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprConditionalFilter.java @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + + +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * For conditional expressions, the{@code IfExprConditionalFilter} class updated + * the selected array of batch parameter after the conditional expression is executed. + * Then the remaining expression will only do the selected rows instead of all. + */ +public class IfExprConditionalFilter { + public int arg1Column = -1; + public int arg2Column = -1; + + public IfExprConditionalFilter(int arg1Column, int arg2Column) { + this.arg1Column = arg1Column; + this.arg2Column = arg2Column; + } + + /** + * For If(expr1,expr2,expr3) expression, + * Firstly, save the previous selected vector, size and selectedInUse value of batch. + * Secondly evaluate the conditional expression and update the selected array of batch based + * on the result of conditional expression(1 denote done, 0 denote not done) + * Then evaluate the expr2 based on the updated selected. + * After the expr2 is executed, remove the indexes which have done in expr2. + * Last, evaluate the expr3 based on the updated selected. + * + * @param batch + */ + public void evaluateIfConditionalExpr(VectorizedRowBatch batch, VectorExpression[] childExpressions) { + if (childExpressions != null) { + // Save the previous selected vector, size and selectedInUse value of batch. + int[] prevSelected = new int[batch.selected.length]; + int[] prevSelectedFalse = new int[batch.selected.length]; + int prevSize = batch.size; + boolean prevSelectInUse = batch.selectedInUse; + if (!batch.selectedInUse) { + for (int i = 0; i < batch.size; i++) { + prevSelected[i] = i; + } + System.arraycopy(batch.selected, 0, prevSelectedFalse, 0, batch.selected.length); + System.arraycopy(prevSelected, 0, batch.selected, 0, batch.size); + } else { + System.arraycopy(batch.selected, 0, prevSelected, 0, batch.selected.length); + } + + // Evaluate the conditional expression. + evaluateConditionalExpression(batch, childExpressions[0], + prevSize, prevSelectInUse); + if (childExpressions!= null && childExpressions.length == 2) { + // If the length is 2, it has two situations:If(expr1,expr2,null) or + // If(expr1,null,expr3) distinguished by the indexes. + if (childExpressions[1].getOutputColumn() == arg2Column) { + // Evaluate the expr2 expression. + childExpressions[1].evaluate(batch); + } else { + // Update the selected array of batch to remove the index of being done. + evaluateSelectedArray(batch, arg1Column, prevSelected, prevSize); + // If(expr1,null,expr3), if the expr1 is false, expr3 will be evaluated. + childExpressions[1].evaluate(batch); + } + } else if (childExpressions!= null && childExpressions.length == 3) { + // IF(expr1,expr2,expr3). expr1,expr2,expr3 are all the expression. + // Evaluate the expr2 expression. + childExpressions[1].evaluate(batch); + // Update the selected array of batch to remove the index of being done. + evaluateSelectedArray(batch, arg1Column, prevSelected, prevSize); + // Evaluate the expr3 expression. + childExpressions[2].evaluate(batch); + } + // When evaluate all the expressions, restore the previous selected + // vector,size and selectedInUse value of batch. + batch.size = prevSize; + batch.selectedInUse = prevSelectInUse; + if(!prevSelectInUse){ + batch.selected = prevSelectedFalse; + } else{ + batch.selected = prevSelected; + } + } + } + + + /** + * Update the selected array of batch based on the conditional expression + * result, remove the index of being done. + * + * @param batch + * @param num the column num of conditional expression in batch cols + * @param prevSelected the previous selected array + */ + private static void evaluateSelectedArray(VectorizedRowBatch batch, int num, + int[] prevSelected, int prevSize) { + // Get the result of conditional expression. + LongColumnVector outputColVector = (LongColumnVector) batch.cols[num]; + long[] flag = outputColVector.vector; + int newSize = 0; + // Update the selected array of batch + for (int j = 0; j < prevSize; j++) { + if (flag[prevSelected[j]] == 0) { + batch.selected[newSize++] = prevSelected[j]; + } + } + batch.size = newSize; + batch.selectedInUse = true; + } + + /** + * Evaluate the conditional expression and update the selected array of batch + * based on the result of conditional expression. + * + * @param batch + * @param ve the conditional expression need to evaluate + * @param prevSize the previous batch size + * @param prevSelectInUse the previous selectInUse + */ + private static void evaluateConditionalExpression(VectorizedRowBatch batch, + VectorExpression ve, int prevSize, + boolean prevSelectInUse) { + batch.size = prevSize; + batch.selectedInUse = prevSelectInUse; + int colNum = ve.getOutputColumn(); + // Evaluate the conditional expression. + ve.evaluate(batch); + LongColumnVector outputColVector = (LongColumnVector) batch.cols[colNum]; + long[] flag = outputColVector.vector; + int[] sel = batch.selected; + int newSize = 0; + // Update the selected array of the batch based on the conditional expression. + for (int j = 0; j < batch.size; j++) { + int k = sel[j]; + if (flag[k] == 1) { + sel[newSize++] = k; + } + } + if(newSize < batch.size ) { + batch.size = newSize; + batch.selectedInUse = true; + } + } +} + diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java index 514b453..7110a63 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java @@ -33,12 +33,14 @@ private int arg1Column, arg2Column, arg3Column; private int outputColumn; + private IfExprConditionalFilter cf; public IfExprDoubleColumnDoubleColumn(int arg1Column, int arg2Column, int arg3Column, int outputColumn) { this.arg1Column = arg1Column; this.arg2Column = arg2Column; this.arg3Column = arg3Column; this.outputColumn = outputColumn; + this.cf = new IfExprConditionalFilter(arg1Column, arg2Column); } public IfExprDoubleColumnDoubleColumn() { @@ -48,7 +50,7 @@ public IfExprDoubleColumnDoubleColumn() { public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { - super.evaluateChildren(batch); + cf.evaluateIfConditionalExpr(batch, childExpressions); } LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java index 98fa29e..33c23c6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java @@ -33,12 +33,14 @@ private int arg1Column, arg2Column, arg3Column; private int outputColumn; + private IfExprConditionalFilter cf; public IfExprIntervalDayTimeColumnColumn(int arg1Column, int arg2Column, int arg3Column, int outputColumn) { this.arg1Column = arg1Column; this.arg2Column = arg2Column; this.arg3Column = arg3Column; this.outputColumn = outputColumn; + this.cf = new IfExprConditionalFilter(arg1Column, arg2Column); } public IfExprIntervalDayTimeColumnColumn() { @@ -48,7 +50,7 @@ public IfExprIntervalDayTimeColumnColumn() { public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { - super.evaluateChildren(batch); + cf.evaluateIfConditionalExpr(batch, childExpressions); } LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java index 4c6015e..11dd4b3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java @@ -32,12 +32,14 @@ private int arg1Column, arg2Column, arg3Column; private int outputColumn; + private IfExprConditionalFilter cf; public IfExprLongColumnLongColumn(int arg1Column, int arg2Column, int arg3Column, int outputColumn) { this.arg1Column = arg1Column; this.arg2Column = arg2Column; this.arg3Column = arg3Column; this.outputColumn = outputColumn; + this.cf = new IfExprConditionalFilter(arg1Column, arg2Column); } public IfExprLongColumnLongColumn() { @@ -47,7 +49,7 @@ public IfExprLongColumnLongColumn() { public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { - super.evaluateChildren(batch); + cf.evaluateIfConditionalExpr(batch, childExpressions); } LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java index 156fcc4..c3339c8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java @@ -29,18 +29,20 @@ private final int arg1Column; private final int arg2Column; private final int outputColumn; + private IfExprConditionalFilter cf; public IfExprNullColumn(int arg1Column, int arg2Column, int outputColumn) { this.arg1Column = arg1Column; this.arg2Column = arg2Column; this.outputColumn = outputColumn; + this.cf = new IfExprConditionalFilter(arg1Column, arg2Column); } @Override public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { - super.evaluateChildren(batch); + cf.evaluateIfConditionalExpr(batch, childExpressions); } final LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java index c8367c6..2899e92 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java @@ -35,12 +35,14 @@ private int arg1Column, arg2Column, arg3Column; private int outputColumn; + private IfExprConditionalFilter cf; public IfExprStringGroupColumnStringGroupColumn(int arg1Column, int arg2Column, int arg3Column, int outputColumn) { this.arg1Column = arg1Column; this.arg2Column = arg2Column; this.arg3Column = arg3Column; this.outputColumn = outputColumn; + this.cf = new IfExprConditionalFilter(arg1Column, arg2Column); } public IfExprStringGroupColumnStringGroupColumn() { @@ -51,7 +53,7 @@ public IfExprStringGroupColumnStringGroupColumn() { public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { - super.evaluateChildren(batch); + cf.evaluateIfConditionalExpr(batch, childExpressions); } LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java index 8219b3c..4bf5af4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java @@ -32,12 +32,13 @@ private int arg1Column, arg2Column, arg3Column; private int outputColumn; - + private IfExprConditionalFilter cf = null; public IfExprTimestampColumnColumnBase(int arg1Column, int arg2Column, int arg3Column, int outputColumn) { this.arg1Column = arg1Column; this.arg2Column = arg2Column; this.arg3Column = arg3Column; this.outputColumn = outputColumn; + this.cf = new IfExprConditionalFilter(arg1Column, arg2Column); } public IfExprTimestampColumnColumnBase() { @@ -45,9 +46,8 @@ public IfExprTimestampColumnColumnBase() { @Override public void evaluate(VectorizedRowBatch batch) { - if (childExpressions != null) { - super.evaluateChildren(batch); + cf.evaluateIfConditionalExpr(batch, childExpressions); } LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java index 14ba646..9ff280d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java @@ -31,9 +31,11 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprConditionalFilter; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIf; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.HiveCharWritable; import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; @@ -61,6 +63,7 @@ private String resultType; private VectorUDFArgDesc[] argDescs; private ExprNodeGenericFuncDesc expr; + private IfExprConditionalFilter cf; private transient GenericUDF genericUDF; private transient GenericUDF.DeferredObject[] deferredChildren; @@ -84,6 +87,7 @@ public VectorUDFAdaptor ( this.outputColumn = outputColumn; this.resultType = resultType; this.argDescs = argDescs; + this.cf = new IfExprConditionalFilter(argDescs[0].getColumnNum(), argDescs[1].getColumnNum()); } // Initialize transient fields. To be called after deserialization of other fields. @@ -125,7 +129,11 @@ public void evaluate(VectorizedRowBatch batch) { } if (childExpressions != null) { - super.evaluateChildren(batch); + if ((GenericUDFIf.class.getName()).equals(genericUDF.getUdfName())) { + cf.evaluateIfConditionalExpr(batch, childExpressions); + } else { + super.evaluateChildren(batch); + } } int[] sel = batch.selected;