diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index eb0d1d7..a9c787a 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -151,6 +151,7 @@ minitez.query.files.shared=acid_globallimit.q,\ load_dyn_part1.q,\ load_dyn_part2.q,\ load_dyn_part3.q,\ + mapjoin2.q,\ mapjoin_mapjoin.q,\ mapreduce1.q,\ mapreduce2.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 96ad809..de6b92e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -55,7 +55,6 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsAction; -import org.apache.hadoop.hive.ql.plan.AlterTableDesc; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.common.StatsSetupConst; @@ -154,6 +153,7 @@ import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFunctionSpec; import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowSpec; import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.plan.AlterTableDesc; import org.apache.hadoop.hive.ql.plan.AlterTableDesc.AlterTableTypes; import org.apache.hadoop.hive.ql.plan.CreateTableDesc; import org.apache.hadoop.hive.ql.plan.CreateTableLikeDesc; @@ -7949,7 +7949,8 @@ private Operator genJoinReduceSinkChild(QB qb, ExprNodeDesc[] joinKeys, // backtrack can be null when input is script operator ExprNodeDesc exprBack = ExprNodeDescUtils.backtrack(expr, dummy, child); int kindex = exprBack == null ? -1 : ExprNodeDescUtils.indexOf(exprBack, reduceKeysBack); - if (kindex >= 0) { + // expr with column info needs to be the same too + if (kindex >= 0 && expr.isSame(reduceKeys.get(kindex))) { ColumnInfo newColInfo = new ColumnInfo(colInfo); newColInfo.setInternalName(Utilities.ReduceField.KEY + ".reducesinkkey" + kindex); newColInfo.setTabAlias(nm[0]); @@ -7961,7 +7962,8 @@ private Operator genJoinReduceSinkChild(QB qb, ExprNodeDesc[] joinKeys, continue; } int vindex = exprBack == null ? -1 : ExprNodeDescUtils.indexOf(exprBack, reduceValuesBack); - if (vindex >= 0) { + // expr with column info needs to be the same too + if (vindex >= 0 && expr.isSame(reduceValues.get(vindex))) { index[i] = -vindex - 1; continue; } diff --git ql/src/test/queries/clientpositive/mapjoin2.q ql/src/test/queries/clientpositive/mapjoin2.q index fcc3bca..068ecbe 100644 --- ql/src/test/queries/clientpositive/mapjoin2.q +++ ql/src/test/queries/clientpositive/mapjoin2.q @@ -10,3 +10,11 @@ select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl where n = 1) a select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl where 2 = 1) a right outer join (select * from tbl where n = 2) b on a.n = b.n; select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from tbl where n = 1) a full outer join (select * from tbl where n = 2) b on a.n = b.n; + +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; + +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; + +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; + +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a full outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; diff --git ql/src/test/results/clientpositive/mapjoin2.q.out ql/src/test/results/clientpositive/mapjoin2.q.out index 7bf3e6a..5ae1ac7 100644 --- ql/src/test/results/clientpositive/mapjoin2.q.out +++ ql/src/test/results/clientpositive/mapjoin2.q.out @@ -54,3 +54,39 @@ POSTHOOK: Input: default@tbl #### A masked pattern was here #### false false true true true true false false +PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +11 1 1 0 0 +PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +11 1 1 0 0 +PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +11 1 1 0 0 +PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a full outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a full outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +11 1 1 0 0 diff --git ql/src/test/results/clientpositive/tez/mapjoin2.q.out ql/src/test/results/clientpositive/tez/mapjoin2.q.out new file mode 100644 index 0000000..5ae1ac7 --- /dev/null +++ ql/src/test/results/clientpositive/tez/mapjoin2.q.out @@ -0,0 +1,92 @@ +PREHOOK: query: create table tbl (n bigint, t string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl +POSTHOOK: query: create table tbl (n bigint, t string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl +PREHOOK: query: insert into tbl values (1, 'one') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@tbl +POSTHOOK: query: insert into tbl values (1, 'one') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@tbl +POSTHOOK: Lineage: tbl.n EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: tbl.t SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into tbl values(2, 'two') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@tbl +POSTHOOK: query: insert into tbl values(2, 'two') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@tbl +POSTHOOK: Lineage: tbl.n EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: tbl.t SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl where n = 1) a left outer join (select * from tbl where 1 = 2) b on a.n = b.n +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl +#### A masked pattern was here #### +POSTHOOK: query: select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl where n = 1) a left outer join (select * from tbl where 1 = 2) b on a.n = b.n +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl +#### A masked pattern was here #### +1 one true true +PREHOOK: query: select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl where 2 = 1) a right outer join (select * from tbl where n = 2) b on a.n = b.n +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl +#### A masked pattern was here #### +POSTHOOK: query: select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl where 2 = 1) a right outer join (select * from tbl where n = 2) b on a.n = b.n +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl +#### A masked pattern was here #### +true true 2 two +PREHOOK: query: select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from tbl where n = 1) a full outer join (select * from tbl where n = 2) b on a.n = b.n +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl +#### A masked pattern was here #### +POSTHOOK: query: select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from tbl where n = 1) a full outer join (select * from tbl where n = 2) b on a.n = b.n +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl +#### A masked pattern was here #### +false false true true +true true false false +PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +11 1 1 0 0 +PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +11 1 1 0 0 +PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +11 1 1 0 0 +PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a full outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a full outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +11 1 1 0 0