diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index f4ae684..7b31aae 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -1170,4 +1170,6 @@ spark.query.negative.files=groupby2_map_skew_multi_distinct.q,\ groupby2_multi_distinct.q,\ groupby3_map_skew_multi_distinct.q,\ groupby3_multi_distinct.q,\ - groupby_grouping_sets7.q + groupby_grouping_sets7.q,\ + mapjoin_with_hint_groupby.q,\ + mapjoin_with_hint_union.q diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SparkMapJoinProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SparkMapJoinProcessor.java index 23ee3ae..fecb541 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SparkMapJoinProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SparkMapJoinProcessor.java @@ -18,13 +18,30 @@ package org.apache.hadoop.hive.ql.optimizer; +import java.util.ArrayList; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; +import java.util.Stack; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.UnionOperator; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.parse.GenMapRedWalker; +import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.JoinCondDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -83,4 +100,39 @@ public MapJoinOperator convertMapJoin(HiveConf conf, return mapJoinOp; } + + @Override + public ParseContext transform(ParseContext pactx) throws SemanticException { + pactx = super.transform(pactx); + + Map opRules = new LinkedHashMap(); + // Group by before MapJoin is not allowed. + opRules.put(new RuleRegExp("R0", + ReduceSinkOperator.getOperatorName() + "%" + GroupByOperator.getOperatorName() + + "%.*" + MapJoinOperator.getOperatorName() + "%"), + new OperatorNotAllowBeforeMapJoin()); + // Union before MapJoin is not allowed. + opRules.put(new RuleRegExp("R1", + UnionOperator.getOperatorName() + "%.*" + MapJoinOperator.getOperatorName() + "%"), + new OperatorNotAllowBeforeMapJoin()); + + Dispatcher disp = new DefaultRuleDispatcher(getDefault(), opRules, null); + + GraphWalker ogw = new GenMapRedWalker(disp); + ArrayList topNodes = new ArrayList(); + topNodes.addAll(pactx.getTopOps().values()); + ogw.startWalking(topNodes, null); + + return pactx; + } + + public static class OperatorNotAllowBeforeMapJoin implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + throw new SemanticException(ErrorMsg.OPERATOR_NOT_ALLOWED_WITH_MAPJOIN.getMsg()); + } + } } diff --git a/ql/src/test/queries/clientnegative/mapjoin_with_hint_groupby.q b/ql/src/test/queries/clientnegative/mapjoin_with_hint_groupby.q new file mode 100644 index 0000000..45ce88d --- /dev/null +++ b/ql/src/test/queries/clientnegative/mapjoin_with_hint_groupby.q @@ -0,0 +1,3 @@ +set hive.ignore.mapjoin.hint=false; +SELECT /*+ MAPJOIN(b) */ * FROM src a JOIN + (select key, count(*) from src group by key) b on a.key=b.key; diff --git a/ql/src/test/queries/clientnegative/mapjoin_with_hint_union.q b/ql/src/test/queries/clientnegative/mapjoin_with_hint_union.q new file mode 100644 index 0000000..49909d8 --- /dev/null +++ b/ql/src/test/queries/clientnegative/mapjoin_with_hint_union.q @@ -0,0 +1,3 @@ +set hive.ignore.mapjoin.hint=false; +SELECT /*+ MAPJOIN(b) */ * FROM src a JOIN + (select key, value from src union all select key, value from src) b on a.key=b.key; diff --git a/ql/src/test/results/clientnegative/mapjoin_with_hint_groupby.q.out b/ql/src/test/results/clientnegative/mapjoin_with_hint_groupby.q.out new file mode 100644 index 0000000..a2d9708 --- /dev/null +++ b/ql/src/test/results/clientnegative/mapjoin_with_hint_groupby.q.out @@ -0,0 +1 @@ +FAILED: SemanticException [Error 10227]: Not all clauses are supported with mapjoin hint. Please remove mapjoin hint. diff --git a/ql/src/test/results/clientnegative/mapjoin_with_hint_union.q.out b/ql/src/test/results/clientnegative/mapjoin_with_hint_union.q.out new file mode 100644 index 0000000..a2d9708 --- /dev/null +++ b/ql/src/test/results/clientnegative/mapjoin_with_hint_union.q.out @@ -0,0 +1 @@ +FAILED: SemanticException [Error 10227]: Not all clauses are supported with mapjoin hint. Please remove mapjoin hint. diff --git a/ql/src/test/results/clientnegative/spark/mapjoin_with_hint_groupby.q.out b/ql/src/test/results/clientnegative/spark/mapjoin_with_hint_groupby.q.out new file mode 100644 index 0000000..a2d9708 --- /dev/null +++ b/ql/src/test/results/clientnegative/spark/mapjoin_with_hint_groupby.q.out @@ -0,0 +1 @@ +FAILED: SemanticException [Error 10227]: Not all clauses are supported with mapjoin hint. Please remove mapjoin hint. diff --git a/ql/src/test/results/clientnegative/spark/mapjoin_with_hint_union.q.out b/ql/src/test/results/clientnegative/spark/mapjoin_with_hint_union.q.out new file mode 100644 index 0000000..a2d9708 --- /dev/null +++ b/ql/src/test/results/clientnegative/spark/mapjoin_with_hint_union.q.out @@ -0,0 +1 @@ +FAILED: SemanticException [Error 10227]: Not all clauses are supported with mapjoin hint. Please remove mapjoin hint.