diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanOptimizer.java index 49b839a351b36af7c17bb0508f17d370d0222db9..e4dacd1c76e5239c3fec4ace2c16518ad600a1dd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanOptimizer.java @@ -19,7 +19,11 @@ package org.apache.hadoop.hive.ql.optimizer.physical; import java.util.ArrayList; +import java.util.HashSet; +import java.util.Iterator; import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; import java.util.Map; import java.util.Stack; @@ -75,6 +79,32 @@ public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { return pctx; } + //We need to make sure that Null Operator (LIM or FIL) is present in all branches of multi-insert query before + //applying the optimization. This method does full tree traversal starting from TS and will return true only if + //it finds target Null operator on each branch. + static private boolean isNullOpPresentInAllBranches(TableScanOperator ts, Node causeOfNullNode) { + Node curNode = null; + List curChd = null; + LinkedList middleNodes = new LinkedList(); + middleNodes.addLast(ts); + while (!middleNodes.isEmpty()) { + curNode = middleNodes.remove(); + curChd = curNode.getChildren(); + for (Node chd: curChd) { + if (chd.getChildren() == null || chd.getChildren().isEmpty() || chd == causeOfNullNode) { + if (chd != causeOfNullNode) { // If there is an end node that not the limit0/wherefalse.. + return false; + } + } + else { + middleNodes.addLast(chd); + } + } + + } + return true; + } + static private class WhereFalseProcessor implements NodeProcessor { @Override @@ -91,22 +121,13 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return null; } - int numOfndPeers = 0; - if (filter.getParentOperators() != null) { - for (Operator fParent : filter.getParentOperators()) { - if (fParent.getChildOperators() != null) { - numOfndPeers += fParent.getChildOperators().size(); - if (numOfndPeers > 1) - return null; - } - } - } - WalkerCtx ctx = (WalkerCtx) procCtx; for (Node op : stack) { if (op instanceof TableScanOperator) { - ctx.setMayBeMetadataOnly((TableScanOperator)op); - LOG.info("Found where false TableScan. " + op); + if (isNullOpPresentInAllBranches((TableScanOperator)op, filter)) { + ctx.setMayBeMetadataOnly((TableScanOperator)op); + LOG.info("Found where false TableScan. " + op); + } } } ctx.convertMetadataOnly(); @@ -120,9 +141,18 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - if(!(((LimitOperator)nd).getConf().getLimit() == 0)) { + LimitOperator limitOp = (LimitOperator)nd; + if(!(limitOp.getConf().getLimit() == 0)) { return null; } + + HashSet tsOps = ((WalkerCtx)procCtx).getMayBeMetadataOnlyTableScans(); + if (tsOps != null) { + for (Iterator tsOp = tsOps.iterator(); tsOp.hasNext();) { + if (!isNullOpPresentInAllBranches(tsOp.next(),limitOp)) + tsOp.remove(); + } + } LOG.info("Found Limit 0 TableScan. " + nd); ((WalkerCtx)procCtx).convertMetadataOnly(); return null; diff --git a/ql/src/test/queries/clientpositive/inputwherefalse.q b/ql/src/test/queries/clientpositive/inputwherefalse.q index c9186e6727553d77e6657673c81fdd3326ed1950..4605ac65a09280fa875c1c5d7ebd3c0f71503136 100644 --- a/ql/src/test/queries/clientpositive/inputwherefalse.q +++ b/ql/src/test/queries/clientpositive/inputwherefalse.q @@ -4,6 +4,22 @@ select key where key=200 limit 1 insert overwrite directory 'target/warehouse/destemp.out/dir2/' select 'header' +limit 0 +insert overwrite directory 'target/warehouse/destemp.out/dir3/' +select key +where key = 100 limit 1; + +dfs -cat ${system:test.warehouse.dir}/destemp.out/dir1/000000_0; +dfs -cat ${system:test.warehouse.dir}/destemp.out/dir2/000000_0; +dfs -cat ${system:test.warehouse.dir}/destemp.out/dir3/000000_0; +dfs -rmr ${system:test.warehouse.dir}/destemp.out; + +From (select * from src) a +insert overwrite directory 'target/warehouse/destemp.out/dir1/' +select key +where key=200 limit 1 +insert overwrite directory 'target/warehouse/destemp.out/dir2/' +select 'header' where 1=2 insert overwrite directory 'target/warehouse/destemp.out/dir3/' select key @@ -13,3 +29,6 @@ dfs -cat ${system:test.warehouse.dir}/destemp.out/dir1/000000_0; dfs -cat ${system:test.warehouse.dir}/destemp.out/dir2/000000_0; dfs -cat ${system:test.warehouse.dir}/destemp.out/dir3/000000_0; dfs -rmr ${system:test.warehouse.dir}/destemp.out; + +explain select a.value from src a join src b on a.key = b.key where a.key=100 limit 0; +select a.value from src a join src b on a.key = b.key where a.key=100 limit 0; diff --git a/ql/src/test/results/clientpositive/inputwherefalse.q.out b/ql/src/test/results/clientpositive/inputwherefalse.q.out index ecb73001b17e3334c5eb144a28b11261426c6819..898fb93ba0daa13e7780d96c09dcdb615abf8042 100644 --- a/ql/src/test/results/clientpositive/inputwherefalse.q.out +++ b/ql/src/test/results/clientpositive/inputwherefalse.q.out @@ -4,6 +4,39 @@ select key where key=200 limit 1 insert overwrite directory 'target/warehouse/destemp.out/dir2/' select 'header' +limit 0 +insert overwrite directory 'target/warehouse/destemp.out/dir3/' +select key +where key = 100 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: target/warehouse/destemp.out/dir1 +PREHOOK: Output: target/warehouse/destemp.out/dir2 +PREHOOK: Output: target/warehouse/destemp.out/dir3 +POSTHOOK: query: From (select * from src) a +insert overwrite directory 'target/warehouse/destemp.out/dir1/' +select key +where key=200 limit 1 +insert overwrite directory 'target/warehouse/destemp.out/dir2/' +select 'header' +limit 0 +insert overwrite directory 'target/warehouse/destemp.out/dir3/' +select key +where key = 100 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: target/warehouse/destemp.out/dir1 +POSTHOOK: Output: target/warehouse/destemp.out/dir2 +POSTHOOK: Output: target/warehouse/destemp.out/dir3 +200 +100 +#### A masked pattern was here #### +PREHOOK: query: From (select * from src) a +insert overwrite directory 'target/warehouse/destemp.out/dir1/' +select key +where key=200 limit 1 +insert overwrite directory 'target/warehouse/destemp.out/dir2/' +select 'header' where 1=2 insert overwrite directory 'target/warehouse/destemp.out/dir3/' select key @@ -31,3 +64,25 @@ POSTHOOK: Output: target/warehouse/destemp.out/dir3 200 100 #### A masked pattern was here #### +PREHOOK: query: explain select a.value from src a join src b on a.key = b.key where a.key=100 limit 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain select a.value from src a join src b on a.key = b.key where a.key=100 limit 0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 0 + Processor Tree: + ListSink + +PREHOOK: query: select a.value from src a join src b on a.key = b.key where a.key=100 limit 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select a.value from src a join src b on a.key = b.key where a.key=100 limit 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here ####