diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 772113acda..f53e7a5030 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -587,6 +587,7 @@ minillaplocal.query.files=acid_globallimit.q,\ semijoin.q,\ semijoin_hint.q,\ smb_cache.q,\ + smb_join_distinct.q, \ special_character_in_tabnames_1.q,\ sqlmerge.q,\ stats_based_fetch_decision.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java index f6f2dd05dd..eee8cf6292 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java @@ -296,7 +296,7 @@ private void initializeSourceForTag(ReduceWork redWork, int tag, ObjectInspector // Note this behavior may have to change if we ever implement a vectorized merge join boolean vectorizedRecordSource = (tag == bigTablePosition) && redWork.getVectorMode(); sources[tag].init(jconf, redWork.getReducer(), vectorizedRecordSource, keyTableDesc, - valueTableDesc, reader, tag == bigTablePosition, (byte) tag, + valueTableDesc, reader, tag == bigTablePosition, redWork.getNeedsTagging() ? (byte) tag : 0, redWork.getVectorizedRowBatchCtx(), redWork.getVectorizedVertexNum()); ois[tag] = sources[tag].getObjectInspector(); } diff --git ql/src/test/queries/clientpositive/smb_join_distinct.q ql/src/test/queries/clientpositive/smb_join_distinct.q new file mode 100644 index 0000000000..e0129abd6a --- /dev/null +++ ql/src/test/queries/clientpositive/smb_join_distinct.q @@ -0,0 +1,15 @@ +set hive.auto.convert.sortmerge.join=true; +set hive.auto.convert.join=false; + +create table t1 (a int, b string); +create table t2 (a int, b string); +insert into t1 values (81, 'one'), (82, 'two'); +insert into t2 values (91, 'one'), (92, 'two'); + +set hive.explain.user=true; +explain select * from (select distinct a from t1) as t1a, (select distinct a from t2) as t2a where t1a.a = t2a.a; +set hive.explain.user=false; +explain select * from (select distinct a from t1) as t1a, (select distinct a from t2) as t2a where t1a.a = t2a.a; + +select * from (select distinct a from t1) as t1a, (select distinct a from t2) as t2a where t1a.a = t2a.a; + diff --git ql/src/test/results/clientpositive/llap/smb_join_distinct.q.out ql/src/test/results/clientpositive/llap/smb_join_distinct.q.out new file mode 100644 index 0000000000..f831d228bc --- /dev/null +++ ql/src/test/results/clientpositive/llap/smb_join_distinct.q.out @@ -0,0 +1,174 @@ +PREHOOK: query: create table t1 (a int, b string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: create table t1 (a int, b string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: create table t2 (a int, b string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: create table t2 (a int, b string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: insert into t1 values (81, 'one'), (82, 'two') +PREHOOK: type: QUERY +PREHOOK: Output: default@t1 +POSTHOOK: query: insert into t1 values (81, 'one'), (82, 'two') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t1 +POSTHOOK: Lineage: t1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into t2 values (91, 'one'), (92, 'two') +PREHOOK: type: QUERY +PREHOOK: Output: default@t2 +POSTHOOK: query: insert into t2 values (91, 'one'), (92, 'two') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: t2.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t2.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: explain select * from (select distinct a from t1) as t1a, (select distinct a from t2) as t2a where t1a.a = t2a.a +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from (select distinct a from t1) as t1a, (select distinct a from t2) as t2a where t1a.a = t2a.a +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 llap + File Output Operator [FS_18] + Merge Join Operator [MERGEJOIN_23] (rows=1 width=6) + Conds:GBY_5._col0=GBY_12._col0(Inner),Output:["_col0","_col1"] + <-Group By Operator [GBY_12] (rows=1 width=6) + Output:["_col0"],keys:KEY._col0 + <-Group By Operator [GBY_5] (rows=1 width=6) + Output:["_col0"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_4] + PartitionCols:_col0 + Group By Operator [GBY_3] (rows=2 width=6) + Output:["_col0"],keys:a + Filter Operator [FIL_21] (rows=2 width=6) + predicate:a is not null + TableScan [TS_0] (rows=2 width=6) + default@t1,t1,Tbl:COMPLETE,Col:NONE,Output:["a"] + <-Map 3 [SIMPLE_EDGE] llap + SHUFFLE [RS_11] + PartitionCols:_col0 + Group By Operator [GBY_10] (rows=2 width=6) + Output:["_col0"],keys:a + Filter Operator [FIL_22] (rows=2 width=6) + predicate:a is not null + TableScan [TS_7] (rows=2 width=6) + default@t2,t2,Tbl:COMPLETE,Col:NONE,Output:["a"] + +PREHOOK: query: explain select * from (select distinct a from t1) as t1a, (select distinct a from t2) as t2a where t1a.a = t2a.a +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from (select distinct a from t1) as t1a, (select distinct a from t2) as t2a where t1a.a = t2a.a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 2 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: a is not null (type: boolean) + Statistics: Num rows: 2 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: a (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 2 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: a is not null (type: boolean) + Statistics: Num rows: 2 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: a (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from (select distinct a from t1) as t1a, (select distinct a from t2) as t2a where t1a.a = t2a.a +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from (select distinct a from t1) as t1a, (select distinct a from t2) as t2a where t1a.a = t2a.a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here ####