diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConditionalFilterVectorExpression.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConditionalFilterVectorExpression.java new file mode 100644 index 0000000..9a48027 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConditionalFilterVectorExpression.java @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + + +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second and third are long columns or long expression results. + * if the expr1 is true, the expr2 will be evaluated; + * if the expr1 is false, the expr3 will be evaluated. + */ +public abstract class ConditionalFilterVectorExpression extends VectorExpression{ + //record the index of value 0 in the output Column Vector of all conditional expression (1 denotes the expr2 expression will be done,0 denotes expr3 expression will be done ) + public static int[][] false_flag; + // record the index of selected rows in the expr3 expression. + public static int[] false_sel; + //record the count of conditional expression. + public static int con_count; + //determine whether the new batch, If so, do some initialization operation. + public static int flag = 0; + //Save the original selected vector,size and selectInUse value of batch. + public static int[] pre_sel = new int[VectorizedRowBatch.DEFAULT_SIZE]; + public static int size; + public static boolean selectInUse; + + + public synchronized static void evaluateIfChildren(VectorizedRowBatch batch, VectorExpression[] childExpressions, int[] indexes) { + flag++; + + if (flag == 1) { + false_flag = new int[VectorizedRowBatch.DEFAULT_SIZE][VectorizedRowBatch.DEFAULT_SIZE]; + false_sel = new int[VectorizedRowBatch.DEFAULT_SIZE]; + con_count = 0; + System.arraycopy(batch.selected,0,pre_sel,0,batch.selected.length); + size = batch.size; + selectInUse = batch.selectedInUse; + } + + if (childExpressions != null) { + int length = childExpressions.length; + + // if the length of childExpressions is 1 (If(expr1,null,null)), then do the conditional expression operation + if (length == 1) { + evaluateConditionalExpression(batch, childExpressions[0]); + } else if (length == 2) { + //If the length is 2, it has two situation:If(expr1,expr2,null) or If(expr1,null,expr3) + + //evaluate the conditional expression. + evaluateConditionalExpression(batch, childExpressions[0]); + if (childExpressions[1].getOutputColumn() == indexes[1]) { + //If(expr1,expr2,null) + childExpressions[1].evaluate(batch); + } else if (childExpressions[1].getChildExpressions() != null) { + /* + If(expr1,null,expr3),here expr3 expression has childrenExpressions. + eg: case when a=1 then b case when a=2 then a+1 end. + here a and b are columns. Hive will convert this statement to If((a=1),null,((a=2),(a+1))), the expr3 expression = (a=2),(a+1)). + */ + evaluateChildExpressions(batch, childExpressions[1]); + } else { + //If(expr1,null,expr3), if the expr1 is false, expr3 will be evaluated. + evaluateFalseExpression(batch, childExpressions[1]); + } + } else if (length == 3) { + //IF(expr1,expr2,expr3). expr1,expr2,expr3 are all the expression. + evaluateConditionalExpression(batch, childExpressions[0]); + //evaluate the expr2 expression. + childExpressions[1].evaluate(batch); + //Here expr3 may be has childExpressions or not. + if (childExpressions[2].childExpressions != null) { + evaluateChildExpressions(batch, childExpressions[2]); + } else { + evaluateFalseExpression(batch, childExpressions[2]); + } + + } + //When evaluate all the expression, restore the original selected vector,size and selectInUse vale of batch + batch.size = size; + batch.selectedInUse = selectInUse; + batch.selected = pre_sel; + flag--; + } + } + + + public synchronized static void evaluateChildExpressions(VectorizedRowBatch batch, VectorExpression ve) { + //when restarting conditional expression, restore the original selected vector,size and selectInUse vale of batch + batch.size = size; + batch.selectedInUse = selectInUse; + batch.selected = pre_sel; + ve.evaluate(batch); + } + + public synchronized static void evaluateConditionalExpression(VectorizedRowBatch batch, VectorExpression ve) { + batch.size = size; + batch.selectedInUse = selectInUse; + int colNum = ve.getOutputColumn(); + ve.evaluate(batch); + LongColumnVector outputColVector = (LongColumnVector) batch.cols[colNum]; + long[] flag = outputColVector.vector; + int[] sel = batch.selected; + int n = batch.size; + int newSize = 0; + //traverse the flag vector to assign sel and else_flag array + for (int j = 0; j < n; j++) { + if (flag[j] == 1) { + sel[newSize++] = j; + } else { + false_flag[con_count][j] = 1; + } + } + con_count++; + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + + public synchronized static void evaluateFalseExpression(VectorizedRowBatch batch, VectorExpression ve) { + int newSize = 0; + //traverse the false_flag vector to assign false_sel array, 1 denotes need doing in the else expression, on the contrary, 0 denotes not doing in the else expression. + for (int i = 0; i < size; i++) { + int flag = 0; + for (int j = 0; j < con_count; j++) { + if (false_flag[j][i] == 0) { + flag = 1; + break; + } + } + if (flag == 0) { + false_sel[newSize++] = i; + } + } + if (newSize < size) { + batch.size = newSize; + batch.selectedInUse = true; + } + batch.selected = false_sel; + ve.evaluate(batch); + } +} + diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java index 8cae274..73eb8f7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java @@ -40,9 +40,9 @@ public IfExprColumnNull(int arg1Column, int arg2Column, int outputColumn) { public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { - super.evaluateChildren(batch); + int[] indexes = {this.arg1Column, this.arg2Column}; + ConditionalFilterVectorExpression.evaluateIfChildren(batch,childExpressions,indexes); } - final LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; final ColumnVector arg2ColVector = batch.cols[arg2Column]; final ColumnVector outputColVector = batch.cols[outputColumn]; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java index 514b453..de68183 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java @@ -48,7 +48,8 @@ public IfExprDoubleColumnDoubleColumn() { public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { - super.evaluateChildren(batch); + int[] indexes = {this.arg1Column, this.arg2Column, this.arg3Column}; + ConditionalFilterVectorExpression.evaluateIfChildren(batch, childExpressions, indexes); } LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java index 98fa29e..7e1f05b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java @@ -48,7 +48,8 @@ public IfExprIntervalDayTimeColumnColumn() { public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { - super.evaluateChildren(batch); + int[] indexes = {this.arg1Column, this.arg2Column, this.arg3Column}; + ConditionalFilterVectorExpression.evaluateIfChildren(batch, childExpressions, indexes); } LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java index 4c6015e..2ac3a39 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java @@ -47,7 +47,8 @@ public IfExprLongColumnLongColumn() { public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { - super.evaluateChildren(batch); + int[] indexes = {this.arg1Column, this.arg2Column, this.arg3Column}; + ConditionalFilterVectorExpression.evaluateIfChildren(batch, childExpressions, indexes); } LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java index 156fcc4..e55d470 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java @@ -40,7 +40,8 @@ public IfExprNullColumn(int arg1Column, int arg2Column, int outputColumn) { public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { - super.evaluateChildren(batch); + int[] indexes = {this.arg1Column, this.arg2Column}; + ConditionalFilterVectorExpression.evaluateIfChildren(batch, childExpressions, indexes); } final LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java index c8367c6..2afba58 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java @@ -51,7 +51,8 @@ public IfExprStringGroupColumnStringGroupColumn() { public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { - super.evaluateChildren(batch); + int[] indexes = {this.arg1Column, this.arg2Column, this.arg3Column}; + ConditionalFilterVectorExpression.evaluateIfChildren(batch,childExpressions,indexes); } LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java index 8219b3c..2996837 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java @@ -47,7 +47,8 @@ public IfExprTimestampColumnColumnBase() { public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { - super.evaluateChildren(batch); + int[] indexes = {this.arg1Column, this.arg2Column, this.arg3Column}; + ConditionalFilterVectorExpression.evaluateIfChildren(batch, childExpressions, indexes); } LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column];