diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/FixedBucketPruningOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/FixedBucketPruningOptimizer.java index 334b8e9bab..00cea086b2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/FixedBucketPruningOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/FixedBucketPruningOptimizer.java @@ -170,16 +170,15 @@ protected void generatePredicate(NodeProcessorCtx procCtx, } else if (expr.getOperator() == Operator.AND) { boolean found = false; for (ExpressionTree subExpr : expr.getChildren()) { - if (subExpr.getOperator() != Operator.LEAF) { - return; - } - // one of the branches is definitely a bucket-leaf - PredicateLeaf l = leaves.get(subExpr.getLeaf()); - if (bucketLeaves.contains(l)) { - if (!addLiteral(literals, l)) { - return; + if (subExpr.getOperator() == Operator.LEAF) { + // one of the branches is definitely a bucket-leaf + PredicateLeaf l = leaves.get(subExpr.getLeaf()); + if (bucketLeaves.contains(l)) { + if (!addLiteral(literals, l)) { + return; + } + found = true; } - found = true; } } if (!found) { diff --git ql/src/test/queries/clientpositive/bucketpruning1.q ql/src/test/queries/clientpositive/bucketpruning1.q index 0f797f7c1d..d867241e2d 100644 --- ql/src/test/queries/clientpositive/bucketpruning1.q +++ ql/src/test/queries/clientpositive/bucketpruning1.q @@ -10,6 +10,11 @@ CREATE TABLE srcbucket_pruned(key int, value string) partitioned by (ds string) -- cannot prune 2-key scenarios without a smarter optimizer CREATE TABLE srcbucket_unpruned(key int, value string) partitioned by (ds string) CLUSTERED BY (key,value) INTO 16 BUCKETS STORED AS TEXTFILE; +-- valid AND cases: when an AND condition is a bucket column, then pruning should work + +explain extended +select * from srcbucket_pruned where key = 1 and value is not null; + -- good cases explain extended diff --git ql/src/test/results/clientpositive/llap/bucketpruning1.q.out ql/src/test/results/clientpositive/llap/bucketpruning1.q.out index cc637db05b..d5f9a6b4c3 100644 --- ql/src/test/results/clientpositive/llap/bucketpruning1.q.out +++ ql/src/test/results/clientpositive/llap/bucketpruning1.q.out @@ -14,6 +14,71 @@ POSTHOOK: query: CREATE TABLE srcbucket_unpruned(key int, value string) partitio POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@srcbucket_unpruned +PREHOOK: query: explain extended +select * from srcbucket_pruned where key = 1 and value is not null +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from srcbucket_pruned where key = 1 and value is not null +POSTHOOK: type: QUERY +OPTIMIZED SQL: SELECT CAST(1 AS INTEGER) AS `key`, `value`, `ds` +FROM `default`.`srcbucket_pruned` +WHERE `key` = 1 AND `value` IS NOT NULL +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcbucket_pruned + filterExpr: ((key = 1) and value is not null) (type: boolean) + buckets included: [13,] of 16 + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: PARTIAL + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = 1) and value is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: 1 (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: PARTIAL +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized, llap + LLAP IO: unknown + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: explain extended select * from srcbucket_pruned where key = 1 PREHOOK: type: QUERY @@ -1310,6 +1375,7 @@ STAGE PLANS: TableScan alias: srcbucket_pruned filterExpr: ((key = 1) and (ds = '2008-04-08') and ((value = 'One') or (value = 'Two'))) (type: boolean) + buckets included: [13,] of 16 Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: PARTIAL GatherStats: false Filter Operator