diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index ad00b452e6..84ac4bb9d2 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -766,6 +766,7 @@ minillaplocal.query.files=\ tez_join_tests.q,\ tez_joins_explain.q,\ tez_multi_union.q,\ + tez_no_dynamic_semijoin_reduction_on_aggcol.q,\ tez_nway_join.q,\ tez_schema_evolution.q,\ tez_self_join.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java index 189c68dd91..91d91e52bd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java @@ -605,7 +605,12 @@ private boolean generateSemiJoinOperatorPlan(DynamicListContext ctx, ParseContex Map colExprMap = new HashMap(); ExprNodeDesc exprNode = null; if ( parentOfRS.getColumnExprMap() != null) { - exprNode = parentOfRS.getColumnExprMap().get(internalColName).clone(); + ExprNodeDesc parentOfRSExpr = parentOfRS.getColumnExprMap().get(internalColName); + // GBY does not seem to have column mappings for the aggregations + if (parentOfRSExpr == null) { + return false; + } + exprNode = parentOfRSExpr.clone(); } else { exprNode = new ExprNodeColumnDesc(columnInfo); } diff --git a/ql/src/test/queries/clientpositive/tez_no_dynamic_semijoin_reduction_on_aggcol.q b/ql/src/test/queries/clientpositive/tez_no_dynamic_semijoin_reduction_on_aggcol.q new file mode 100644 index 0000000000..c19fedcf89 --- /dev/null +++ b/ql/src/test/queries/clientpositive/tez_no_dynamic_semijoin_reduction_on_aggcol.q @@ -0,0 +1,11 @@ +set hive.explain.user=false; +set hive.tez.dynamic.partition.pruning=true; +set hive.tez.dynamic.semijoin.reduction=true; +set hive.tez.bigtable.minsize.semijoin.reduction=1; +set hive.tez.min.bloom.filter.entries=1; + +create table tez_no_dynpart_hashjoin_on_agg(id int, outcome string, eventid int) stored as orc; + +explain select a.id, b.outcome from (select id, max(eventid) as event_id_max from tez_no_dynpart_hashjoin_on_agg group by id) a +LEFT OUTER JOIN tez_no_dynpart_hashjoin_on_agg b +on a.event_id_max = b.eventid; diff --git a/ql/src/test/results/clientpositive/llap/tez_no_dynamic_semijoin_reduction_on_aggcol.q.out b/ql/src/test/results/clientpositive/llap/tez_no_dynamic_semijoin_reduction_on_aggcol.q.out new file mode 100644 index 0000000000..5a331bd6ad --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/tez_no_dynamic_semijoin_reduction_on_aggcol.q.out @@ -0,0 +1,122 @@ +PREHOOK: query: create table tez_no_dynpart_hashjoin_on_agg(id int, outcome string, eventid int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tez_no_dynpart_hashjoin_on_agg +POSTHOOK: query: create table tez_no_dynpart_hashjoin_on_agg(id int, outcome string, eventid int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tez_no_dynpart_hashjoin_on_agg +PREHOOK: query: explain select a.id, b.outcome from (select id, max(eventid) as event_id_max from tez_no_dynpart_hashjoin_on_agg group by id) a +LEFT OUTER JOIN tez_no_dynpart_hashjoin_on_agg b +on a.event_id_max = b.eventid +PREHOOK: type: QUERY +PREHOOK: Input: default@tez_no_dynpart_hashjoin_on_agg +#### A masked pattern was here #### +POSTHOOK: query: explain select a.id, b.outcome from (select id, max(eventid) as event_id_max from tez_no_dynpart_hashjoin_on_agg group by id) a +LEFT OUTER JOIN tez_no_dynpart_hashjoin_on_agg b +on a.event_id_max = b.eventid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tez_no_dynpart_hashjoin_on_agg +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 4 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tez_no_dynpart_hashjoin_on_agg + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int), eventid (type: int) + outputColumnNames: id, eventid + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(eventid) + keys: id (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + filterExpr: eventid is not null (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: eventid is not null (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: outcome (type: string), eventid (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +