diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java index bae80f3c26..019372bc0a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java @@ -1158,15 +1158,18 @@ public static MapJoinDesc getMapJoinDesc(HiveConf hconf, } Map> filters = desc.getFilters(); - Map> newFilters = new HashMap>(); - for (Map.Entry> entry : filters.entrySet()) { - byte srcTag = entry.getKey(); - List filter = entry.getValue(); - - Operator terminal = oldReduceSinkParentOps.get(srcTag); - newFilters.put(srcTag, ExprNodeDescUtils.backtrack(filter, op, terminal)); + if(adjustParentsChildren) { + // backtrack and update filter expressions only if RS is to be removed + Map> newFilters = new HashMap>(); + for (Map.Entry> entry : filters.entrySet()) { + byte srcTag = entry.getKey(); + List filter = entry.getValue(); + + Operator terminal = oldReduceSinkParentOps.get(srcTag); + newFilters.put(srcTag, ExprNodeDescUtils.backtrack(filter, op, terminal)); + } + desc.setFilters(filters = newFilters); } - desc.setFilters(filters = newFilters); // create dumpfile prefix needed to create descriptor String dumpFilePrefix = ""; diff --git a/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_1.q b/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_1.q index ea3dfcea11..47c00381d6 100644 --- a/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_1.q +++ b/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_1.q @@ -60,6 +60,22 @@ set hive.auto.convert.join.noconditionaltask.size=20000; set hive.exec.reducers.bytes.per.reducer=20000; set hive.stats.fetch.column.stats=false; -- Try with dynamically partitioned hashjoin + +-- hashjoin with filter +explain select + * +from alltypesorc a left outer join alltypesorc b on a.cint = b.cint and a.csmallint != a.cint +where + a.cint between 1000000 and 3000000 +order by a.cint; + +select + * +from alltypesorc a left outer join alltypesorc b on a.cint = b.cint and a.csmallint != a.cint +where + a.cint between 1000000 and 3000000 +order by a.cint; + explain select * diff --git a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out index cfa87a7485..d204b47499 100644 --- a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out @@ -415,6 +415,146 @@ NULL 6 -8915 1 -3799 1 10782 1 +PREHOOK: query: explain select + * +from alltypesorc a left outer join alltypesorc b on a.cint = b.cint and a.csmallint != a.cint +where + a.cint between 1000000 and 3000000 +order by a.cint +PREHOOK: type: QUERY +POSTHOOK: query: explain select + * +from alltypesorc a left outer join alltypesorc b on a.cint = b.cint and a.csmallint != a.cint +where + a.cint between 1000000 and 3000000 +order by a.cint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: cint BETWEEN 1000000 AND 3000000 (type: boolean) + Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cint BETWEEN 1000000 AND 3000000 (type: boolean) + Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + filterExpr: cint BETWEEN 1000000 AND 3000000 (type: boolean) + Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cint BETWEEN 1000000 AND 3000000 (type: boolean) + Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Map Join Operator + condition map: + Left Outer Join 0 to 1 + filter predicates: + 0 {(UDFToInteger(VALUE._col1) <> KEY.reducesinkkey0)} + 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + input vertices: + 1 Map 4 + Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: tinyint), _col13 (type: smallint), _col14 (type: int), _col15 (type: bigint), _col16 (type: float), _col17 (type: double), _col18 (type: string), _col19 (type: string), _col20 (type: timestamp), _col21 (type: timestamp), _col22 (type: boolean), _col23 (type: boolean) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: double), VALUE._col5 (type: string), VALUE._col6 (type: string), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: tinyint), VALUE._col12 (type: smallint), VALUE._col13 (type: int), VALUE._col14 (type: bigint), VALUE._col15 (type: float), VALUE._col16 (type: double), VALUE._col17 (type: string), VALUE._col18 (type: string), VALUE._col19 (type: timestamp), VALUE._col20 (type: timestamp), VALUE._col21 (type: boolean), VALUE._col22 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + * +from alltypesorc a left outer join alltypesorc b on a.cint = b.cint and a.csmallint != a.cint +where + a.cint between 1000000 and 3000000 +order by a.cint +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select + * +from alltypesorc a left outer join alltypesorc b on a.cint = b.cint and a.csmallint != a.cint +where + a.cint between 1000000 and 3000000 +order by a.cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +11 NULL 1000828 1531084669 11.0 NULL wM316f6NqGIkoP388j3F6 poWQQo3Upvt3Wh 1969-12-31 16:00:02.351 NULL false true NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL -3799 1248059 1864027286 NULL -3799.0 Uhps6mMh3IfHB3j7yH62K 4KWs6gw7lv2WYd66P NULL 1969-12-31 15:59:54.622 false true NULL -3799 1248059 1864027286 NULL -3799.0 Uhps6mMh3IfHB3j7yH62K 4KWs6gw7lv2WYd66P NULL 1969-12-31 15:59:54.622 false true +NULL 10782 1286921 1864027286 NULL 10782.0 ODLrXI8882q8LS8 4KWs6gw7lv2WYd66P NULL 1969-12-31 15:59:52.138 true true NULL 10782 1286921 1864027286 NULL 10782.0 ODLrXI8882q8LS8 4KWs6gw7lv2WYd66P NULL 1969-12-31 15:59:52.138 true true +NULL -13036 1288927 -1645852809 NULL -13036.0 yinBY725P7V2 xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:00.763 true false NULL -13036 1288927 -1645852809 NULL -13036.0 yinBY725P7V2 xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:00.763 true false +11 NULL 1310786 -413875656 11.0 NULL W0rvA4H1xn0xMG4uk0 8yVVjG 1969-12-31 16:00:02.351 NULL false true NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +-51 NULL 2089466 -240556350 -51.0 NULL cXX24dH7tblSj46j2g C31eea0wrHHqvj 1969-12-31 16:00:08.451 NULL true true NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL -8915 2101183 1864027286 NULL -8915.0 x7By66525 4KWs6gw7lv2WYd66P NULL 1969-12-31 16:00:05.831 false true NULL -8915 2101183 1864027286 NULL -8915.0 x7By66525 4KWs6gw7lv2WYd66P NULL 1969-12-31 16:00:05.831 false true +8 NULL 2229621 -381406148 8.0 NULL q7onkS7QRPh5ghOK oKb0bi 1969-12-31 16:00:15.892 NULL true false NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +8 NULL 2433892 -1611863517 8.0 NULL 674ILv3V2TxFqXP6wSbL VLprkK2XfX 1969-12-31 16:00:15.892 NULL false true NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +-51 NULL 2949963 -1580871111 -51.0 NULL 0K68k3bdl7jO7 TPPAu 1969-12-31 16:00:08.451 NULL true false NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL PREHOOK: query: explain select * diff --git a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out index 2a03d374e0..990e3572f2 100644 --- a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out @@ -182,13 +182,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Map Join Operator condition map: Left Outer Join 0 to 1 filter predicates: - 0 {(_col2 < 100)} + 0 {(KEY.reducesinkkey0 < 100)} 1 keys: 0 KEY.reducesinkkey0 (type: int)