diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 3f95be2..7afe902 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -461,7 +461,7 @@ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc, Mode mode) th ve = getColumnVectorExpression((ExprNodeColumnDesc) exprDesc, mode); } else if (exprDesc instanceof ExprNodeGenericFuncDesc) { ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) exprDesc; - if (isCustomUDF(expr) || isNonVectorizedPathUDF(expr)) { + if (isCustomUDF(expr) || isNonVectorizedPathUDF(expr, mode)) { ve = getCustomUDFExpression(expr); } else { @@ -750,7 +750,7 @@ private GenericUDF getGenericUDFForCast(TypeInfo castType) throws HiveException * Depending on performance requirements and frequency of use, these * may be implemented in the future with an optimized VectorExpression. */ - public static boolean isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr) { + public static boolean isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr, Mode mode) { GenericUDF gudf = expr.getGenericUDF(); if (gudf instanceof GenericUDFBridge) { GenericUDFBridge bridge = (GenericUDFBridge) gudf; @@ -792,6 +792,9 @@ public static boolean isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr) { || arg0Type(expr).equals("double") || arg0Type(expr).equals("float"))) { return true; + } else if (gudf instanceof GenericUDFBetween && (mode == Mode.PROJECTION)) { + // between has 4 args here, but can be vectorized like this + return true; } return false; } @@ -1194,7 +1197,7 @@ private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, childExpr = castedChildren; //First handle special cases - if (udf instanceof GenericUDFBetween) { + if (udf instanceof GenericUDFBetween && mode == Mode.FILTER) { return getBetweenFilterExpression(childExpr, mode, returnType); } else if (udf instanceof GenericUDFIn) { return getInExpression(childExpr, mode, returnType); diff --git ql/src/test/queries/clientpositive/vector_between_in.q ql/src/test/queries/clientpositive/vector_between_in.q index d57f980..18a4f34 100644 --- ql/src/test/queries/clientpositive/vector_between_in.q +++ ql/src/test/queries/clientpositive/vector_between_in.q @@ -35,3 +35,35 @@ SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1; SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351; + + + +-- projections + +EXPLAIN SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0; + +EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0; + +EXPLAIN SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0; + +EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0; + +SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0; + +SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0; + +SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0; + +SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0; + +-- look at comparisons + +set hive.vectorized.execution.enabled=false; + +SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0; + +SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0; + +SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0; + +SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0; diff --git ql/src/test/results/clientpositive/vector_between_in.q.out ql/src/test/results/clientpositive/vector_between_in.q.out index 4c3ed71..9f351b2 100644 --- ql/src/test/results/clientpositive/vector_between_in.q.out +++ ql/src/test/results/clientpositive/vector_between_in.q.out @@ -637,3 +637,307 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_date_test #### A masked pattern was here #### 6172 +PREHOOK: query: -- projections + +EXPLAIN SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: type: QUERY +POSTHOOK: query: -- projections + +EXPLAIN SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: decimal_date_test + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (cdate) IN (1969-10-26, 1969-07-14) (type: boolean) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: boolean) + sort order: + + Map-reduce partition columns: _col0 (type: boolean) + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: decimal_date_test + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: boolean) + sort order: + + Map-reduce partition columns: _col0 (type: boolean) + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: decimal_date_test + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cdate BETWEEN 1969-12-30 AND 1970-01-02 (type: boolean) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: boolean) + sort order: + + Map-reduce partition columns: _col0 (type: boolean) + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: decimal_date_test + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 (type: boolean) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: boolean) + sort order: + + Map-reduce partition columns: _col0 (type: boolean) + Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +NULL 6230 +false 6041 +true 17 +PREHOOK: query: SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +NULL 3114 +false 9165 +true 9 +PREHOOK: query: SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +NULL 6230 +false 5974 +true 84 +PREHOOK: query: SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +NULL 3114 +false 3002 +true 6172 +PREHOOK: query: SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +NULL 6230 +false 6041 +true 17 +PREHOOK: query: SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +NULL 3114 +false 9165 +true 9 +PREHOOK: query: SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +NULL 6230 +false 5974 +true 84 +PREHOOK: query: SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_date_test +#### A masked pattern was here #### +NULL 3114 +false 3002 +true 6172