diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 05bdb24cf3..30d7af64a5 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -516,6 +516,7 @@ minillaplocal.query.files=\ dynamic_semijoin_reduction_2.q,\ dynamic_semijoin_reduction_3.q,\ dynamic_semijoin_reduction_4.q,\ + dynamic_semijoin_reduction_on_aggcol.q,\ dynamic_semijoin_reduction_sw.q,\ dynpart_sort_opt_vectorization.q,\ dynpart_sort_optimization.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java index 189c68dd91..6686273b14 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java @@ -604,16 +604,11 @@ private boolean generateSemiJoinOperatorPlan(DynamicListContext ctx, ParseContex // Create the column expr map Map colExprMap = new HashMap(); ExprNodeDesc exprNode = null; - if ( parentOfRS.getColumnExprMap() != null) { - exprNode = parentOfRS.getColumnExprMap().get(internalColName).clone(); - } else { - exprNode = new ExprNodeColumnDesc(columnInfo); - } - - if (exprNode instanceof ExprNodeColumnDesc) { - ExprNodeColumnDesc encd = (ExprNodeColumnDesc) exprNode; - encd.setColumn(internalColName); + if (columnInfo == null) { + LOG.debug("No ColumnInfo found in {} for {}", parentOfRS.getOperatorId(), internalColName); + return false; } + exprNode = new ExprNodeColumnDesc(columnInfo); colExprMap.put(internalColName, exprNode); // Create the Select Operator diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_on_aggcol.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_on_aggcol.q new file mode 100644 index 0000000000..e7c8db3e77 --- /dev/null +++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_on_aggcol.q @@ -0,0 +1,17 @@ +--! qt:dataset:src +set hive.explain.user=false; +set hive.tez.dynamic.partition.pruning=true; +set hive.tez.dynamic.semijoin.reduction=true; +set hive.tez.bigtable.minsize.semijoin.reduction=1; +set hive.tez.min.bloom.filter.entries=1; + +create table dynamic_semijoin_reduction_on_aggcol(id int, outcome string, eventid int) stored as orc; +insert into dynamic_semijoin_reduction_on_aggcol select key, value, key from src; + +explain select a.id, b.outcome from (select id, max(eventid) as event_id_max from dynamic_semijoin_reduction_on_aggcol where id = 0 group by id) a +LEFT OUTER JOIN dynamic_semijoin_reduction_on_aggcol b +on a.event_id_max = b.eventid; + +select a.id, b.outcome from (select id, max(eventid) as event_id_max from dynamic_semijoin_reduction_on_aggcol where id = 0 group by id) a +LEFT OUTER JOIN dynamic_semijoin_reduction_on_aggcol b +on a.event_id_max = b.eventid; diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_on_aggcol.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_on_aggcol.q.out new file mode 100644 index 0000000000..9f2af56f42 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_on_aggcol.q.out @@ -0,0 +1,183 @@ +PREHOOK: query: create table dynamic_semijoin_reduction_on_aggcol(id int, outcome string, eventid int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dynamic_semijoin_reduction_on_aggcol +POSTHOOK: query: create table dynamic_semijoin_reduction_on_aggcol(id int, outcome string, eventid int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dynamic_semijoin_reduction_on_aggcol +PREHOOK: query: insert into dynamic_semijoin_reduction_on_aggcol select key, value, key from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dynamic_semijoin_reduction_on_aggcol +POSTHOOK: query: insert into dynamic_semijoin_reduction_on_aggcol select key, value, key from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dynamic_semijoin_reduction_on_aggcol +POSTHOOK: Lineage: dynamic_semijoin_reduction_on_aggcol.eventid EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dynamic_semijoin_reduction_on_aggcol.id EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dynamic_semijoin_reduction_on_aggcol.outcome SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain select a.id, b.outcome from (select id, max(eventid) as event_id_max from dynamic_semijoin_reduction_on_aggcol where id = 0 group by id) a +LEFT OUTER JOIN dynamic_semijoin_reduction_on_aggcol b +on a.event_id_max = b.eventid +PREHOOK: type: QUERY +PREHOOK: Input: default@dynamic_semijoin_reduction_on_aggcol +#### A masked pattern was here #### +POSTHOOK: query: explain select a.id, b.outcome from (select id, max(eventid) as event_id_max from dynamic_semijoin_reduction_on_aggcol where id = 0 group by id) a +LEFT OUTER JOIN dynamic_semijoin_reduction_on_aggcol b +on a.event_id_max = b.eventid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dynamic_semijoin_reduction_on_aggcol +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 5 <- Reducer 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dynamic_semijoin_reduction_on_aggcol + filterExpr: (id = 0) (type: boolean) + Statistics: Num rows: 500 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (id = 0) (type: boolean) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: eventid (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: max(_col1) + keys: true (type: boolean) + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: boolean) + sort order: + + Map-reduce partition columns: _col0 (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: b + filterExpr: (eventid is not null and (eventid BETWEEN DynamicValue(RS_11_dynamic_semijoin_reduction_on_aggcol__col1_min) AND DynamicValue(RS_11_dynamic_semijoin_reduction_on_aggcol__col1_max) and in_bloom_filter(eventid, DynamicValue(RS_11_dynamic_semijoin_reduction_on_aggcol__col1_bloom_filter)))) (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((eventid BETWEEN DynamicValue(RS_11_dynamic_semijoin_reduction_on_aggcol__col1_min) AND DynamicValue(RS_11_dynamic_semijoin_reduction_on_aggcol__col1_max) and in_bloom_filter(eventid, DynamicValue(RS_11_dynamic_semijoin_reduction_on_aggcol__col1_bloom_filter))) and eventid is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: outcome (type: string), eventid (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + keys: KEY._col0 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.id, b.outcome from (select id, max(eventid) as event_id_max from dynamic_semijoin_reduction_on_aggcol where id = 0 group by id) a +LEFT OUTER JOIN dynamic_semijoin_reduction_on_aggcol b +on a.event_id_max = b.eventid +PREHOOK: type: QUERY +PREHOOK: Input: default@dynamic_semijoin_reduction_on_aggcol +#### A masked pattern was here #### +POSTHOOK: query: select a.id, b.outcome from (select id, max(eventid) as event_id_max from dynamic_semijoin_reduction_on_aggcol where id = 0 group by id) a +LEFT OUTER JOIN dynamic_semijoin_reduction_on_aggcol b +on a.event_id_max = b.eventid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dynamic_semijoin_reduction_on_aggcol +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0