diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index bf78251..f184b8d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -103,7 +103,6 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; -import org.apache.hadoop.hive.ql.udf.SettableUDF; import org.apache.hadoop.hive.ql.udf.*; import org.apache.hadoop.hive.ql.udf.generic.*; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; @@ -583,6 +582,29 @@ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc, VectorExpress ve = getColumnVectorExpression((ExprNodeColumnDesc) exprDesc, mode); } else if (exprDesc instanceof ExprNodeGenericFuncDesc) { ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) exprDesc; + // push not through between... + if ("not".equals(expr.getFuncText())) { + if (expr.getChildren() != null && expr.getChildren().size() == 1) { + ExprNodeDesc child = expr.getChildren().get(0); + if (child instanceof ExprNodeGenericFuncDesc) { + ExprNodeGenericFuncDesc childExpr = (ExprNodeGenericFuncDesc) child; + if ("between".equals(childExpr.getFuncText())) { + ExprNodeConstantDesc flag = (ExprNodeConstantDesc) childExpr.getChildren().get(0); + List newChildren = new ArrayList<>(); + if (Boolean.TRUE.equals(flag.getValue())) { + newChildren.add(new ExprNodeConstantDesc(Boolean.FALSE)); + } else { + newChildren.add(new ExprNodeConstantDesc(Boolean.TRUE)); + } + newChildren + .addAll(childExpr.getChildren().subList(1, childExpr.getChildren().size())); + expr.setTypeInfo(childExpr.getTypeInfo()); + expr.setGenericUDF(childExpr.getGenericUDF()); + expr.setChildren(newChildren); + } + } + } + } // Add cast expression if needed. Child expressions of a udf may return different data types // and that would require converting their data types to evaluate the udf. // For example decimal column added to an integer column would require integer column to be diff --git a/ql/src/test/results/clientpositive/llap/vector_between_in.q.out b/ql/src/test/results/clientpositive/llap/vector_between_in.q.out index 4bbb170..8a4383e 100644 --- a/ql/src/test/results/clientpositive/llap/vector_between_in.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_between_in.q.out @@ -553,8 +553,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsFalse(col 4)(children: VectorUDFAdaptor(cdate BETWEEN 1968-05-01 AND 1971-09-01) -> 4:boolean) -> boolean - predicate: (not cdate BETWEEN 1968-05-01 AND 1971-09-01) (type: boolean) + predicateExpression: FilterLongColumnNotBetween(col 3, left -610, right 608) -> boolean + predicate: cdate NOT BETWEEN 1968-05-01 AND 1971-09-01 (type: boolean) Statistics: Num rows: 10923 Data size: 2193503 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdate (type: date) @@ -581,7 +581,7 @@ STAGE PLANS: groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true Reducer 2 Execution mode: vectorized, llap @@ -747,8 +747,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsFalse(col 4)(children: VectorUDFAdaptor(cdecimal1 BETWEEN -2000 AND 4390.1351351351) -> 4:boolean) -> boolean - predicate: (not cdecimal1 BETWEEN -2000 AND 4390.1351351351) (type: boolean) + predicateExpression: FilterDecimalColumnNotBetween(col 1, left -2000, right 4390.1351351351) -> boolean + predicate: cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 (type: boolean) Statistics: Num rows: 10923 Data size: 2193503 Basic stats: COMPLETE Column stats: NONE Select Operator Select Vectorization: @@ -784,7 +784,7 @@ STAGE PLANS: groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true Reducer 2 Execution mode: vectorized, llap @@ -1516,21 +1516,21 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 1, 2, 3] Select Operator - expressions: (not cdecimal1 BETWEEN -2000 AND 4390.1351351351) (type: boolean) + expressions: cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 (type: boolean) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumns: [5] - selectExpressions: NotCol(col 4)(children: VectorUDFAdaptor(cdecimal1 BETWEEN -2000 AND 4390.1351351351) -> 4:boolean) -> 5:boolean + projectedOutputColumns: [4] + selectExpressions: VectorUDFAdaptor(cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351) -> 4:boolean Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) Group By Vectorization: - aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 4:long) -> bigint + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint className: VectorGroupByOperator vectorOutput: true - keyExpressions: col 5 + keyExpressions: col 4 native: false projectedOutputColumns: [0] keys: _col0 (type: boolean)