diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 441b278..8773bd3 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -318,6 +318,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ minitez.query.files=bucket_map_join_tez1.q,\ bucket_map_join_tez2.q,\ + constprog_dpp.q,\ dynamic_partition_pruning.q,\ dynamic_partition_pruning_2.q,\ explainuser_1.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java index b5ee4ef..dd53ced 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java @@ -125,6 +125,7 @@ public ParseContext transform(ParseContext pactx) throws SemanticException { } opToDelete.getParentOperators().get(0).removeChildAndAdoptItsChildren(opToDelete); } + cppCtx.getOpToDelete().clear(); return pGraphContext; } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java index f30e330..d0b10c3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java @@ -20,12 +20,12 @@ import java.io.Serializable; -import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; -import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.Set; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.ColumnInfo; @@ -53,7 +53,7 @@ .getLog(ConstantPropagateProcCtx.class); private final Map, Map> opToConstantExprs; - private final List> opToDelete; + private final Set> opToDelete; private ConstantPropagateOption constantPropagateOption = ConstantPropagateOption.FULL; public ConstantPropagateProcCtx() { @@ -63,7 +63,7 @@ public ConstantPropagateProcCtx() { public ConstantPropagateProcCtx(ConstantPropagateOption option) { opToConstantExprs = new HashMap, Map>(); - opToDelete = new ArrayList>(); + opToDelete = new HashSet>(); this.constantPropagateOption = option; } @@ -193,7 +193,7 @@ public void addOpToDelete(Operator op) { opToDelete.add(op); } - public List> getOpToDelete() { + public Set> getOpToDelete() { return opToDelete; } diff --git ql/src/test/queries/clientpositive/constprog_dpp.q ql/src/test/queries/clientpositive/constprog_dpp.q new file mode 100644 index 0000000..f1c9b13 --- /dev/null +++ ql/src/test/queries/clientpositive/constprog_dpp.q @@ -0,0 +1,17 @@ +set hive.execution.engine=tez; +set hive.optimize.constant.propagation=true; +set hive.tez.dynamic.partition.pruning=true; + +drop table if exists tb1; +create table tb1 (id int); + +drop table if exists tb2; +create table tb2 (id smallint); + +explain +select a.id from tb1 a +left outer join +(select id from tb2 +union all +select 2 as id from tb2 limit 1) b +on a.id=b.id; \ No newline at end of file diff --git ql/src/test/results/clientpositive/tez/constprog_dpp.q.out ql/src/test/results/clientpositive/tez/constprog_dpp.q.out new file mode 100644 index 0000000..b2b2371 --- /dev/null +++ ql/src/test/results/clientpositive/tez/constprog_dpp.q.out @@ -0,0 +1,113 @@ +PREHOOK: query: drop table if exists tb1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists tb1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table tb1 (id int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tb1 +POSTHOOK: query: create table tb1 (id int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tb1 +PREHOOK: query: drop table if exists tb2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists tb2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table tb2 (id smallint) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tb2 +POSTHOOK: query: create table tb2 (id smallint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tb2 +PREHOOK: query: explain +select a.id from tb1 a +left outer join +(select id from tb2 +union all +select 2 as id from tb2 limit 1) b +on a.id=b.id +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.id from tb1 a +left outer join +(select id from tb2 +union all +select 2 as id from tb2 limit 1) b +on a.id=b.id +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing statistics. Please check log for more details. + +Vertex dependency in root stage +Map 1 <- Union 2 (CONTAINS) +Map 5 <- Union 2 (CONTAINS) +Reducer 3 <- Union 2 (SIMPLE_EDGE) +Reducer 4 <- Map 6 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 4 + File Output Operator [FS_16] + compressed:false + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Merge Join Operator [MERGEJOIN_20] + | condition map:[{"":"Left Outer Join0 to 1"}] + | keys:{"1":"_col0 (type: int)","0":"id (type: int)"} + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + |<-Map 6 [SIMPLE_EDGE] + | Reduce Output Operator [RS_12] + | key expressions:id (type: int) + | Map-reduce partition columns:id (type: int) + | sort order:+ + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + | TableScan [TS_11] + | alias:a + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_13] + key expressions:_col0 (type: int) + Map-reduce partition columns:_col0 (type: int) + sort order:+ + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Limit [LIM_10] + Number of rows:1 + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_9] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Union 2 [SIMPLE_EDGE] + |<-Map 1 [CONTAINS] + | Reduce Output Operator [RS_8] + | sort order: + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + | value expressions:_col0 (type: int) + | Limit [LIM_7] + | Number of rows:1 + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + | Select Operator [SEL_1] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + | TableScan [TS_0] + | alias:tb2 + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + |<-Map 5 [CONTAINS] + Reduce Output Operator [RS_8] + sort order: + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + value expressions:_col0 (type: int) + Limit [LIM_7] + Number of rows:1 + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_3] + outputColumnNames:["_col0"] + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + TableScan [TS_2] + alias:tb2 + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE +