diff --git common/src/java/org/apache/hadoop/hive/common/jsonexplain/Vertex.java common/src/java/org/apache/hadoop/hive/common/jsonexplain/Vertex.java index b7dc88c..a73893f 100644 --- common/src/java/org/apache/hadoop/hive/common/jsonexplain/Vertex.java +++ common/src/java/org/apache/hadoop/hive/common/jsonexplain/Vertex.java @@ -74,7 +74,7 @@ public VertexType vertexType; public static enum EdgeType { - BROADCAST, SHUFFLE, MULTICAST, PARTITION_ONLY_SHUFFLE, FORWARD, UNKNOWN + BROADCAST, SHUFFLE, MULTICAST, PARTITION_ONLY_SHUFFLE, FORWARD, XPROD_EDGE, UNKNOWN }; public String edgeType; diff --git common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/TezJsonParser.java common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/TezJsonParser.java index 69e5358..b6cca10 100644 --- common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/TezJsonParser.java +++ common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/TezJsonParser.java @@ -36,6 +36,8 @@ public String mapEdgeType(String edgeName) { return "MULTICAST"; case "ONE_TO_ONE_EDGE": return "FORWARD"; + case "XPROD_EDGE": + return "XPROD_EDGE"; default: return "UNKNOWN"; } diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 9084785..34298de 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3006,6 +3006,8 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal 0.5f, "The maximum fraction of JVM memory which Tez will reserve for the processor"), TEZ_TASK_SCALE_MEMORY_RESERVE_FRACTION("hive.tez.task.scale.memory.reserve.fraction", -1f, "The customized fraction of JVM memory which Tez will reserve for the processor"), + TEZ_CARTESIAN_PRODUCT_EDGE_ENABLED("hive.tez.cartesian-product.enabled", + false, "Use Tez cartesian product edge to speed up cross product"), // The default is different on the client and server, so it's null here. LLAP_IO_ENABLED("hive.llap.io.enabled", null, "Whether the LLAP IO layer is enabled."), LLAP_IO_TRACE_SIZE("hive.llap.io.trace.size", "2Mb", diff --git data/conf/llap/hive-site.xml data/conf/llap/hive-site.xml index 870b584..8cd5144 100644 --- data/conf/llap/hive-site.xml +++ data/conf/llap/hive-site.xml @@ -338,4 +338,9 @@ true + + hive.tez.cartesian-product.enabled + true + + diff --git data/conf/tez/hive-site.xml data/conf/tez/hive-site.xml index 35e8c99..f1dabf5 100644 --- data/conf/tez/hive-site.xml +++ data/conf/tez/hive-site.xml @@ -283,4 +283,9 @@ true + + hive.tez.cartesian-product.enabled + true + + diff --git itests/qtest/x.patch itests/qtest/x.patch new file mode 100644 index 0000000..acff0a5 --- /dev/null +++ itests/qtest/x.patch @@ -0,0 +1,8655 @@ +diff --git a/common/src/java/org/apache/hadoop/hive/common/jsonexplain/Vertex.java b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/Vertex.java +index b7dc88c..a73893f 100644 +--- a/common/src/java/org/apache/hadoop/hive/common/jsonexplain/Vertex.java ++++ b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/Vertex.java +@@ -74,7 +74,7 @@ + public VertexType vertexType; + + public static enum EdgeType { +- BROADCAST, SHUFFLE, MULTICAST, PARTITION_ONLY_SHUFFLE, FORWARD, UNKNOWN ++ BROADCAST, SHUFFLE, MULTICAST, PARTITION_ONLY_SHUFFLE, FORWARD, XPROD_EDGE, UNKNOWN + }; + public String edgeType; + +diff --git a/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/TezJsonParser.java b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/TezJsonParser.java +index 69e5358..b6cca10 100644 +--- a/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/TezJsonParser.java ++++ b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/TezJsonParser.java +@@ -36,6 +36,8 @@ public String mapEdgeType(String edgeName) { + return "MULTICAST"; + case "ONE_TO_ONE_EDGE": + return "FORWARD"; ++ case "XPROD_EDGE": ++ return "XPROD_EDGE"; + default: + return "UNKNOWN"; + } +diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +index 9084785..34298de 100644 +--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ++++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +@@ -3006,6 +3006,8 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal + 0.5f, "The maximum fraction of JVM memory which Tez will reserve for the processor"), + TEZ_TASK_SCALE_MEMORY_RESERVE_FRACTION("hive.tez.task.scale.memory.reserve.fraction", + -1f, "The customized fraction of JVM memory which Tez will reserve for the processor"), ++ TEZ_CARTESIAN_PRODUCT_EDGE_ENABLED("hive.tez.cartesian-product.enabled", ++ false, "Use Tez cartesian product edge to speed up cross product"), + // The default is different on the client and server, so it's null here. + LLAP_IO_ENABLED("hive.llap.io.enabled", null, "Whether the LLAP IO layer is enabled."), + LLAP_IO_TRACE_SIZE("hive.llap.io.trace.size", "2Mb", +diff --git a/data/conf/llap/hive-site.xml b/data/conf/llap/hive-site.xml +index 870b584..8cd5144 100644 +--- a/data/conf/llap/hive-site.xml ++++ b/data/conf/llap/hive-site.xml +@@ -338,4 +338,9 @@ + true + + ++ ++ hive.tez.cartesian-product.enabled ++ true ++ ++ + +diff --git a/data/conf/tez/hive-site.xml b/data/conf/tez/hive-site.xml +index 35e8c99..f1dabf5 100644 +--- a/data/conf/tez/hive-site.xml ++++ b/data/conf/tez/hive-site.xml +@@ -283,4 +283,9 @@ + true + + ++ ++ hive.tez.cartesian-product.enabled ++ true ++ ++ + +diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties +index 5190f04..3f85b16 100644 +--- a/itests/src/test/resources/testconfiguration.properties ++++ b/itests/src/test/resources/testconfiguration.properties +@@ -138,6 +138,9 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ + count_dist_rewrite.q,\ + create_merge_compressed.q,\ + cross_join.q,\ ++ cross_prod_1.q,\ ++ cross_prod_3.q,\ ++ cross_prod_4.q,\ + cross_product_check_1.q,\ + cross_product_check_2.q,\ + ctas.q,\ +@@ -504,6 +507,9 @@ minillaplocal.query.files=\ + correlationoptimizer4.q,\ + correlationoptimizer6.q,\ + disable_merge_for_bucketing.q,\ ++ cross_prod_1.q,\ ++ cross_prod_3.q,\ ++ cross_prod_4.q,\ + dynamic_partition_pruning.q,\ + dynamic_semijoin_reduction.q,\ + dynamic_semijoin_reduction_2.q,\ +diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java +index aae3480..5c338b8 100644 +--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java ++++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java +@@ -46,6 +46,9 @@ + import org.apache.tez.mapreduce.common.MRInputSplitDistributor; + import org.apache.tez.mapreduce.hadoop.InputSplitInfo; + import org.apache.tez.mapreduce.protos.MRRuntimeProtos; ++import org.apache.tez.runtime.library.api.Partitioner; ++import org.apache.tez.runtime.library.cartesianproduct.CartesianProductConfig; ++import org.apache.tez.runtime.library.cartesianproduct.CartesianProductEdgeManager; + import org.slf4j.Logger; + import org.slf4j.LoggerFactory; + import org.apache.hadoop.conf.Configuration; +@@ -135,6 +138,7 @@ + import org.apache.tez.runtime.library.conf.UnorderedKVEdgeConfig; + import org.apache.tez.runtime.library.conf.UnorderedPartitionedKVEdgeConfig; + import org.apache.tez.runtime.library.input.ConcatenatedMergedKeyValueInput; ++import org.apache.tez.runtime.library.cartesianproduct.CartesianProductVertexManager; + + /** + * DagUtils. DagUtils is a collection of helper methods to convert +@@ -264,7 +268,7 @@ private JobConf initializeVertexConf(JobConf baseConf, Context context, MapWork + */ + @SuppressWarnings("rawtypes") + public GroupInputEdge createEdge(VertexGroup group, JobConf vConf, Vertex w, +- TezEdgeProperty edgeProp, VertexType vertexType) ++ TezEdgeProperty edgeProp, BaseWork work, TezWork tezWork) + throws IOException { + + Class mergeInputClass; +@@ -279,7 +283,8 @@ public GroupInputEdge createEdge(VertexGroup group, JobConf vConf, Vertex w, + case CUSTOM_EDGE: { + mergeInputClass = ConcatenatedMergedKeyValueInput.class; + int numBuckets = edgeProp.getNumBuckets(); +- CustomVertexConfiguration vertexConf = new CustomVertexConfiguration(numBuckets, vertexType); ++ CustomVertexConfiguration vertexConf ++ = new CustomVertexConfiguration(numBuckets, tezWork.getVertexType(work)); + DataOutputBuffer dob = new DataOutputBuffer(); + vertexConf.write(dob); + VertexManagerPluginDescriptor desc = +@@ -299,6 +304,10 @@ public GroupInputEdge createEdge(VertexGroup group, JobConf vConf, Vertex w, + mergeInputClass = ConcatenatedMergedKeyValueInput.class; + break; + ++ case XPROD_EDGE: ++ mergeInputClass = ConcatenatedMergedKeyValueInput.class; ++ break; ++ + case SIMPLE_EDGE: + setupAutoReducerParallelism(edgeProp, w); + // fall through +@@ -308,7 +317,7 @@ public GroupInputEdge createEdge(VertexGroup group, JobConf vConf, Vertex w, + break; + } + +- return GroupInputEdge.create(group, w, createEdgeProperty(edgeProp, vConf), ++ return GroupInputEdge.create(group, w, createEdgeProperty(w, edgeProp, vConf, work, tezWork), + InputDescriptor.create(mergeInputClass.getName())); + } + +@@ -322,13 +331,14 @@ public GroupInputEdge createEdge(VertexGroup group, JobConf vConf, Vertex w, + * @return + */ + public Edge createEdge(JobConf vConf, Vertex v, Vertex w, TezEdgeProperty edgeProp, +- VertexType vertexType) ++ BaseWork work, TezWork tezWork) + throws IOException { + + switch(edgeProp.getEdgeType()) { + case CUSTOM_EDGE: { + int numBuckets = edgeProp.getNumBuckets(); +- CustomVertexConfiguration vertexConf = new CustomVertexConfiguration(numBuckets, vertexType); ++ CustomVertexConfiguration vertexConf = ++ new CustomVertexConfiguration(numBuckets, tezWork.getVertexType(work)); + DataOutputBuffer dob = new DataOutputBuffer(); + vertexConf.write(dob); + VertexManagerPluginDescriptor desc = VertexManagerPluginDescriptor.create( +@@ -339,6 +349,9 @@ public Edge createEdge(JobConf vConf, Vertex v, Vertex w, TezEdgeProperty edgePr + w.setVertexManagerPlugin(desc); + break; + } ++ case XPROD_EDGE: ++ break; ++ + case SIMPLE_EDGE: { + setupAutoReducerParallelism(edgeProp, w); + break; +@@ -352,14 +365,15 @@ public Edge createEdge(JobConf vConf, Vertex v, Vertex w, TezEdgeProperty edgePr + // nothing + } + +- return Edge.create(v, w, createEdgeProperty(edgeProp, vConf)); ++ return Edge.create(v, w, createEdgeProperty(w, edgeProp, vConf, work, tezWork)); + } + + /* + * Helper function to create an edge property from an edge type. + */ +- private EdgeProperty createEdgeProperty(TezEdgeProperty edgeProp, Configuration conf) +- throws IOException { ++ private EdgeProperty createEdgeProperty(Vertex w, TezEdgeProperty edgeProp, ++ Configuration conf, BaseWork work, TezWork tezWork) ++ throws IOException { + MRHelpers.translateMRConfToTez(conf); + String keyClass = conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS); + String valClass = conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS); +@@ -412,7 +426,23 @@ private EdgeProperty createEdgeProperty(TezEdgeProperty edgeProp, Configuration + .setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null) + .build(); + return et4Conf.createDefaultOneToOneEdgeProperty(); ++ case XPROD_EDGE: ++ EdgeManagerPluginDescriptor edgeManagerDescriptor = ++ EdgeManagerPluginDescriptor.create(CartesianProductEdgeManager.class.getName()); ++ List crossProductSources = new ArrayList<>(); ++ for (BaseWork parentWork : tezWork.getParents(work)) { ++ if (EdgeType.XPROD_EDGE == tezWork.getEdgeType(parentWork, work)) { ++ crossProductSources.add(parentWork.getName()); ++ } ++ } ++ CartesianProductConfig cpConfig = new CartesianProductConfig(crossProductSources); ++ edgeManagerDescriptor.setUserPayload(cpConfig.toUserPayload(new TezConfiguration(conf))); ++ UnorderedPartitionedKVEdgeConfig cpEdgeConf = ++ UnorderedPartitionedKVEdgeConfig.newBuilder(keyClass, valClass, ++ ValueHashPartitioner.class.getName()).build(); ++ return cpEdgeConf.createDefaultCustomEdgeProperty(edgeManagerDescriptor); + case SIMPLE_EDGE: ++ // fallthrough + default: + assert partitionerClassName != null; + partitionerConf = createPartitionerConf(partitionerClassName, conf); +@@ -427,6 +457,14 @@ private EdgeProperty createEdgeProperty(TezEdgeProperty edgeProp, Configuration + } + } + ++ public static class ValueHashPartitioner implements Partitioner { ++ ++ @Override ++ public int getPartition(Object key, Object value, int numPartitions) { ++ return (value.hashCode() & 2147483647) % numPartitions; ++ } ++ } ++ + /** + * Utility method to create a stripped down configuration for the MR partitioner. + * +@@ -1240,6 +1278,21 @@ public Vertex createVertex(JobConf conf, BaseWork work, + } else if (work instanceof MergeJoinWork) { + v = createVertex(conf, (MergeJoinWork) work, appJarLr, additionalLr, fileSystem, scratchDir, + ctx, vertexType); ++ // set VertexManagerPlugin if whether it's a cross product destination vertex ++ List crossProductSources = new ArrayList<>(); ++ for (BaseWork parentWork : tezWork.getParents(work)) { ++ if (tezWork.getEdgeType(parentWork, work) == EdgeType.XPROD_EDGE) { ++ crossProductSources.add(parentWork.getName()); ++ } ++ } ++ ++ if (!crossProductSources.isEmpty()) { ++ CartesianProductConfig cpConfig = new CartesianProductConfig(crossProductSources); ++ v.setVertexManagerPlugin( ++ VertexManagerPluginDescriptor.create(CartesianProductVertexManager.class.getName()) ++ .setUserPayload(cpConfig.toUserPayload(new TezConfiguration(conf)))); ++ // parallelism shouldn't be set for cartesian product vertex ++ } + } else { + // something is seriously wrong if this is happening + throw new HiveException(ErrorMsg.GENERIC_ERROR.getErrorCodedMsg()); +diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java +index 28d91cc..74ceb22 100644 +--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java ++++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java +@@ -461,7 +461,7 @@ DAG build(JobConf conf, TezWork work, Path scratchDir, + for (BaseWork v: children) { + // finally we can create the grouped edge + GroupInputEdge e = utils.createEdge(group, parentConf, +- workToVertex.get(v), work.getEdgeProperty(w, v), work.getVertexType(v)); ++ workToVertex.get(v), work.getEdgeProperty(w, v), v, work); + + dag.addEdge(e); + } +@@ -490,8 +490,7 @@ DAG build(JobConf conf, TezWork work, Path scratchDir, + Edge e = null; + + TezEdgeProperty edgeProp = work.getEdgeProperty(w, v); +- +- e = utils.createEdge(wxConf, wx, workToVertex.get(v), edgeProp, work.getVertexType(v)); ++ e = utils.createEdge(wxConf, wx, workToVertex.get(v), edgeProp, v, work); + dag.addEdge(e); + } + } +diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +index e24760b..3af53ef 100644 +--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java ++++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +@@ -102,6 +102,14 @@ + MemoryMonitorInfo memoryMonitorInfo = getMemoryMonitorInfo(maxSize, context.conf); + joinOp.getConf().setMemoryMonitorInfo(memoryMonitorInfo); + ++ // not use map join in case of cross product ++ boolean cartesianProductEdgeEnabled = ++ HiveConf.getBoolVar(context.conf, HiveConf.ConfVars.TEZ_CARTESIAN_PRODUCT_EDGE_ENABLED); ++ if (cartesianProductEdgeEnabled && !hasOuterJoin(joinOp) && isCrossProduct(joinOp)) { ++ fallbackToMergeJoin(joinOp, context); ++ return null; ++ } ++ + TezBucketJoinProcCtx tezBucketJoinProcCtx = new TezBucketJoinProcCtx(context.conf); + boolean hiveConvertJoin = context.conf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN) & + !context.parseContext.getDisableMapJoin(); +@@ -572,6 +580,42 @@ private boolean checkColEquality(List> grandParentColNames, + return false; + } + ++ private boolean hasOuterJoin(JoinOperator joinOp) throws SemanticException { ++ boolean hasOuter = false; ++ for (JoinCondDesc joinCondDesc : joinOp.getConf().getConds()) { ++ switch (joinCondDesc.getType()) { ++ case JoinDesc.INNER_JOIN: ++ case JoinDesc.LEFT_SEMI_JOIN: ++ case JoinDesc.UNIQUE_JOIN: ++ hasOuter = false; ++ break; ++ ++ case JoinDesc.FULL_OUTER_JOIN: ++ case JoinDesc.LEFT_OUTER_JOIN: ++ case JoinDesc.RIGHT_OUTER_JOIN: ++ hasOuter = true; ++ break; ++ ++ default: ++ throw new SemanticException("Unknown join type " + joinCondDesc.getType()); ++ } ++ } ++ return hasOuter; ++ } ++ ++ private boolean isCrossProduct(JoinOperator joinOp) { ++ ExprNodeDesc[][] joinExprs = joinOp.getConf().getJoinKeys(); ++ if (joinExprs != null) { ++ for (ExprNodeDesc[] expr : joinExprs) { ++ if (expr != null && expr.length != 0) { ++ return false; ++ } ++ } ++ } ++ ++ return true; ++ } ++ + /** + * Obtain big table position for join. + * +@@ -597,26 +641,7 @@ public int getMapJoinConversionPos(JoinOperator joinOp, OptimizeTezProcContext c + * case this for now. + */ + if (joinOp.getConf().getConds().length > 1) { +- boolean hasOuter = false; +- for (JoinCondDesc joinCondDesc : joinOp.getConf().getConds()) { +- switch (joinCondDesc.getType()) { +- case JoinDesc.INNER_JOIN: +- case JoinDesc.LEFT_SEMI_JOIN: +- case JoinDesc.UNIQUE_JOIN: +- hasOuter = false; +- break; +- +- case JoinDesc.FULL_OUTER_JOIN: +- case JoinDesc.LEFT_OUTER_JOIN: +- case JoinDesc.RIGHT_OUTER_JOIN: +- hasOuter = true; +- break; +- +- default: +- throw new SemanticException("Unknown join type " + joinCondDesc.getType()); +- } +- } +- if (hasOuter) { ++ if (hasOuterJoin(joinOp)) { + return -1; + } + } +@@ -1058,14 +1083,19 @@ private void fallbackToReduceSideJoin(JoinOperator joinOp, OptimizeTezProcContex + } + } + ++ // we are just converting to a common merge join operator. The shuffle ++ // join in map-reduce case. ++ fallbackToMergeJoin(joinOp, context); ++ } ++ ++ private void fallbackToMergeJoin(JoinOperator joinOp, OptimizeTezProcContext context) ++ throws SemanticException { + int pos = getMapJoinConversionPos(joinOp, context, estimateNumBuckets(joinOp, false), + true, Long.MAX_VALUE, false); + if (pos < 0) { + LOG.info("Could not get a valid join position. Defaulting to position 0"); + pos = 0; + } +- // we are just converting to a common merge join operator. The shuffle +- // join in map-reduce case. + LOG.info("Fallback to common merge join operator"); + convertJoinSMBJoin(joinOp, context, pos, 0, false); + } +diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductCheck.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductCheck.java +deleted file mode 100644 +index f5abaf1..0000000 +--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductCheck.java ++++ /dev/null +@@ -1,368 +0,0 @@ +-/** +- * Licensed to the Apache Software Foundation (ASF) under one +- * or more contributor license agreements. See the NOTICE file +- * distributed with this work for additional information +- * regarding copyright ownership. The ASF licenses this file +- * to you under the Apache License, Version 2.0 (the +- * "License"); you may not use this file except in compliance +- * with the License. You may obtain a copy of the License at +- * +- * http://www.apache.org/licenses/LICENSE-2.0 +- * +- * Unless required by applicable law or agreed to in writing, software +- * distributed under the License is distributed on an "AS IS" BASIS, +- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- * See the License for the specific language governing permissions and +- * limitations under the License. +- */ +- +-package org.apache.hadoop.hive.ql.optimizer.physical; +- +-import java.io.Serializable; +-import java.util.ArrayList; +-import java.util.Arrays; +-import java.util.HashMap; +-import java.util.Iterator; +-import java.util.LinkedHashMap; +-import java.util.List; +-import java.util.Map; +-import java.util.Stack; +- +-import org.slf4j.Logger; +-import org.slf4j.LoggerFactory; +-import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; +-import org.apache.hadoop.hive.ql.exec.ConditionalTask; +-import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator; +-import org.apache.hadoop.hive.ql.exec.JoinOperator; +-import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +-import org.apache.hadoop.hive.ql.exec.Operator; +-import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +-import org.apache.hadoop.hive.ql.exec.TableScanOperator; +-import org.apache.hadoop.hive.ql.exec.Task; +-import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; +-import org.apache.hadoop.hive.ql.exec.tez.TezTask; +-import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; +-import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +-import org.apache.hadoop.hive.ql.lib.Dispatcher; +-import org.apache.hadoop.hive.ql.lib.GraphWalker; +-import org.apache.hadoop.hive.ql.lib.Node; +-import org.apache.hadoop.hive.ql.lib.NodeProcessor; +-import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +-import org.apache.hadoop.hive.ql.lib.Rule; +-import org.apache.hadoop.hive.ql.lib.RuleRegExp; +-import org.apache.hadoop.hive.ql.lib.TaskGraphWalker; +-import org.apache.hadoop.hive.ql.parse.SemanticException; +-import org.apache.hadoop.hive.ql.plan.BaseWork; +-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +-import org.apache.hadoop.hive.ql.plan.MapJoinDesc; +-import org.apache.hadoop.hive.ql.plan.MapWork; +-import org.apache.hadoop.hive.ql.plan.MapredWork; +-import org.apache.hadoop.hive.ql.plan.MergeJoinWork; +-import org.apache.hadoop.hive.ql.plan.OperatorDesc; +-import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; +-import org.apache.hadoop.hive.ql.plan.ReduceWork; +-import org.apache.hadoop.hive.ql.plan.TableScanDesc; +-import org.apache.hadoop.hive.ql.plan.TezWork; +-import org.apache.hadoop.hive.ql.session.SessionState; +- +-/* +- * Check each MapJoin and ShuffleJoin Operator to see they are performing a cross product. +- * If yes, output a warning to the Session's console. +- * The Checks made are the following: +- * 1. MR, Shuffle Join: +- * Check the parent ReduceSinkOp of the JoinOp. If its keys list is size = 0, then +- * this is a cross product. +- * The parent ReduceSinkOp is in the MapWork for the same Stage. +- * 2. MR, MapJoin: +- * If the keys expr list on the mapJoin Desc is an empty list for any input, +- * this implies a cross product. +- * 3. Tez, Shuffle Join: +- * Check the parent ReduceSinkOp of the JoinOp. If its keys list is size = 0, then +- * this is a cross product. +- * The parent ReduceSinkOp checked is based on the ReduceWork.tagToInput map on the +- * reduceWork that contains the JoinOp. +- * 4. Tez, Map Join: +- * If the keys expr list on the mapJoin Desc is an empty list for any input, +- * this implies a cross product. +- */ +-public class CrossProductCheck implements PhysicalPlanResolver, Dispatcher { +- +- protected static transient final Logger LOG = LoggerFactory +- .getLogger(CrossProductCheck.class); +- +- @Override +- public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { +- TaskGraphWalker ogw = new TaskGraphWalker(this); +- +- ArrayList topNodes = new ArrayList(); +- topNodes.addAll(pctx.getRootTasks()); +- +- ogw.startWalking(topNodes, null); +- return pctx; +- } +- +- @Override +- public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) +- throws SemanticException { +- @SuppressWarnings("unchecked") +- Task currTask = (Task) nd; +- if (currTask instanceof MapRedTask) { +- MapRedTask mrTsk = (MapRedTask)currTask; +- MapredWork mrWrk = mrTsk.getWork(); +- checkMapJoins(mrTsk); +- checkMRReducer(currTask.toString(), mrWrk); +- } else if (currTask instanceof ConditionalTask ) { +- List> taskListInConditionalTask = +- ((ConditionalTask) currTask).getListTasks(); +- for(Task tsk: taskListInConditionalTask){ +- dispatch(tsk, stack, nodeOutputs); +- } +- +- } else if (currTask instanceof TezTask) { +- TezTask tzTask = (TezTask) currTask; +- TezWork tzWrk = tzTask.getWork(); +- checkMapJoins(tzWrk); +- checkTezReducer(tzWrk); +- } +- return null; +- } +- +- private void warn(String msg) { +- SessionState.getConsole().printInfo("Warning: " + msg, false); +- } +- +- private void checkMapJoins(MapRedTask mrTsk) throws SemanticException { +- MapredWork mrWrk = mrTsk.getWork(); +- MapWork mapWork = mrWrk.getMapWork(); +- List warnings = new MapJoinCheck(mrTsk.toString()).analyze(mapWork); +- if (!warnings.isEmpty()) { +- for (String w : warnings) { +- warn(w); +- } +- } +- ReduceWork redWork = mrWrk.getReduceWork(); +- if (redWork != null) { +- warnings = new MapJoinCheck(mrTsk.toString()).analyze(redWork); +- if (!warnings.isEmpty()) { +- for (String w : warnings) { +- warn(w); +- } +- } +- } +- } +- +- private void checkMapJoins(TezWork tzWrk) throws SemanticException { +- for(BaseWork wrk : tzWrk.getAllWork() ) { +- +- if ( wrk instanceof MergeJoinWork ) { +- wrk = ((MergeJoinWork)wrk).getMainWork(); +- } +- +- List warnings = new MapJoinCheck(wrk.getName()).analyze(wrk); +- if ( !warnings.isEmpty() ) { +- for(String w : warnings) { +- warn(w); +- } +- } +- } +- } +- +- private void checkTezReducer(TezWork tzWrk) throws SemanticException { +- for(BaseWork wrk : tzWrk.getAllWork() ) { +- +- if ( wrk instanceof MergeJoinWork ) { +- wrk = ((MergeJoinWork)wrk).getMainWork(); +- } +- +- if ( !(wrk instanceof ReduceWork ) ) { +- continue; +- } +- ReduceWork rWork = (ReduceWork) wrk; +- Operator reducer = ((ReduceWork)wrk).getReducer(); +- if ( reducer instanceof JoinOperator || reducer instanceof CommonMergeJoinOperator ) { +- Map rsInfo = +- new HashMap(); +- for(Map.Entry e : rWork.getTagToInput().entrySet()) { +- rsInfo.putAll(getReducerInfo(tzWrk, rWork.getName(), e.getValue())); +- } +- checkForCrossProduct(rWork.getName(), reducer, rsInfo); +- } +- } +- } +- +- private void checkMRReducer(String taskName, MapredWork mrWrk) throws SemanticException { +- ReduceWork rWrk = mrWrk.getReduceWork(); +- if ( rWrk == null) { +- return; +- } +- Operator reducer = rWrk.getReducer(); +- if ( reducer instanceof JoinOperator|| reducer instanceof CommonMergeJoinOperator ) { +- BaseWork prntWork = mrWrk.getMapWork(); +- checkForCrossProduct(taskName, reducer, +- new ExtractReduceSinkInfo(null).analyze(prntWork)); +- } +- } +- +- private void checkForCrossProduct(String taskName, +- Operator reducer, +- Map rsInfo) { +- if ( rsInfo.isEmpty() ) { +- return; +- } +- Iterator it = rsInfo.values().iterator(); +- ExtractReduceSinkInfo.Info info = it.next(); +- if (info.keyCols.size() == 0) { +- List iAliases = new ArrayList(); +- iAliases.addAll(info.inputAliases); +- while (it.hasNext()) { +- info = it.next(); +- iAliases.addAll(info.inputAliases); +- } +- String warning = String.format( +- "Shuffle Join %s[tables = %s] in Stage '%s' is a cross product", +- reducer.toString(), +- iAliases, +- taskName); +- warn(warning); +- } +- } +- +- private Map getReducerInfo(TezWork tzWrk, String vertex, String prntVertex) +- throws SemanticException { +- BaseWork prntWork = tzWrk.getWorkMap().get(prntVertex); +- return new ExtractReduceSinkInfo(vertex).analyze(prntWork); +- } +- +- /* +- * Given a Work descriptor and the TaskName for the work +- * this is responsible to check each MapJoinOp for cross products. +- * The analyze call returns the warnings list. +- *

+- * For MR the taskname is the StageName, for Tez it is the vertex name. +- */ +- public static class MapJoinCheck implements NodeProcessor, NodeProcessorCtx { +- +- final List warnings; +- final String taskName; +- +- MapJoinCheck(String taskName) { +- this.taskName = taskName; +- warnings = new ArrayList(); +- } +- +- List analyze(BaseWork work) throws SemanticException { +- Map opRules = new LinkedHashMap(); +- opRules.put(new RuleRegExp("R1", MapJoinOperator.getOperatorName() +- + "%"), this); +- Dispatcher disp = new DefaultRuleDispatcher(new NoopProcessor(), opRules, this); +- GraphWalker ogw = new DefaultGraphWalker(disp); +- ArrayList topNodes = new ArrayList(); +- topNodes.addAll(work.getAllRootOperators()); +- ogw.startWalking(topNodes, null); +- return warnings; +- } +- +- @Override +- public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, +- Object... nodeOutputs) throws SemanticException { +- @SuppressWarnings("unchecked") +- AbstractMapJoinOperator mjOp = (AbstractMapJoinOperator) nd; +- MapJoinDesc mjDesc = mjOp.getConf(); +- +- String bigTablAlias = mjDesc.getBigTableAlias(); +- if ( bigTablAlias == null ) { +- Operator parent = null; +- for(Operator op : mjOp.getParentOperators() ) { +- if ( op instanceof TableScanOperator ) { +- parent = op; +- } +- } +- if ( parent != null) { +- TableScanDesc tDesc = ((TableScanOperator)parent).getConf(); +- bigTablAlias = tDesc.getAlias(); +- } +- } +- bigTablAlias = bigTablAlias == null ? "?" : bigTablAlias; +- +- List joinExprs = mjDesc.getKeys().values().iterator().next(); +- +- if ( joinExprs.size() == 0 ) { +- warnings.add( +- String.format("Map Join %s[bigTable=%s] in task '%s' is a cross product", +- mjOp.toString(), bigTablAlias, taskName)); +- } +- +- return null; +- } +- } +- +- /* +- * for a given Work Descriptor, it extracts information about the ReduceSinkOps +- * in the Work. For Tez, you can restrict it to ReduceSinks for a particular output +- * vertex. +- */ +- public static class ExtractReduceSinkInfo implements NodeProcessor, NodeProcessorCtx { +- +- static class Info { +- List keyCols; +- List inputAliases; +- +- Info(List keyCols, List inputAliases) { +- this.keyCols = keyCols; +- this.inputAliases = inputAliases == null ? new ArrayList() : inputAliases; +- } +- +- Info(List keyCols, String[] inputAliases) { +- this.keyCols = keyCols; +- this.inputAliases = inputAliases == null ? new ArrayList() : Arrays.asList(inputAliases); +- } +- } +- +- final String outputTaskName; +- final Map reduceSinkInfo; +- +- ExtractReduceSinkInfo(String parentTaskName) { +- this.outputTaskName = parentTaskName; +- reduceSinkInfo = new HashMap(); +- } +- +- Map analyze(BaseWork work) throws SemanticException { +- Map opRules = new LinkedHashMap(); +- opRules.put(new RuleRegExp("R1", ReduceSinkOperator.getOperatorName() +- + "%"), this); +- Dispatcher disp = new DefaultRuleDispatcher(new NoopProcessor(), opRules, this); +- GraphWalker ogw = new DefaultGraphWalker(disp); +- ArrayList topNodes = new ArrayList(); +- topNodes.addAll(work.getAllRootOperators()); +- ogw.startWalking(topNodes, null); +- return reduceSinkInfo; +- } +- +- @Override +- public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, +- Object... nodeOutputs) throws SemanticException { +- ReduceSinkOperator rsOp = (ReduceSinkOperator) nd; +- ReduceSinkDesc rsDesc = rsOp.getConf(); +- +- if ( outputTaskName != null ) { +- String rOutputName = rsDesc.getOutputName(); +- if ( rOutputName == null || !outputTaskName.equals(rOutputName)) { +- return null; +- } +- } +- +- reduceSinkInfo.put(rsDesc.getTag(), +- new Info(rsDesc.getKeyCols(), rsOp.getInputAliases())); +- +- return null; +- } +- } +- +- static class NoopProcessor implements NodeProcessor { +- @Override +- public final Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, +- Object... nodeOutputs) throws SemanticException { +- return nd; +- } +- } +-} +diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductHandler.java +new file mode 100644 +index 0000000..93367d9 +--- /dev/null ++++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductHandler.java +@@ -0,0 +1,382 @@ ++/** ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++package org.apache.hadoop.hive.ql.optimizer.physical; ++ ++import java.io.Serializable; ++import java.util.ArrayList; ++import java.util.Arrays; ++import java.util.HashMap; ++import java.util.Iterator; ++import java.util.LinkedHashMap; ++import java.util.List; ++import java.util.Map; ++import java.util.Stack; ++ ++import org.apache.hadoop.hive.conf.HiveConf; ++import org.apache.hadoop.hive.ql.plan.*; ++import org.slf4j.Logger; ++import org.slf4j.LoggerFactory; ++import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; ++import org.apache.hadoop.hive.ql.exec.ConditionalTask; ++import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator; ++import org.apache.hadoop.hive.ql.exec.JoinOperator; ++import org.apache.hadoop.hive.ql.exec.MapJoinOperator; ++import org.apache.hadoop.hive.ql.exec.Operator; ++import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; ++import org.apache.hadoop.hive.ql.exec.TableScanOperator; ++import org.apache.hadoop.hive.ql.exec.Task; ++import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; ++import org.apache.hadoop.hive.ql.exec.tez.TezTask; ++import org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType; ++import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; ++import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; ++import org.apache.hadoop.hive.ql.lib.Dispatcher; ++import org.apache.hadoop.hive.ql.lib.GraphWalker; ++import org.apache.hadoop.hive.ql.lib.Node; ++import org.apache.hadoop.hive.ql.lib.NodeProcessor; ++import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; ++import org.apache.hadoop.hive.ql.lib.Rule; ++import org.apache.hadoop.hive.ql.lib.RuleRegExp; ++import org.apache.hadoop.hive.ql.lib.TaskGraphWalker; ++import org.apache.hadoop.hive.ql.parse.SemanticException; ++import org.apache.hadoop.hive.ql.session.SessionState; ++ ++/* ++ * Check each MapJoin and ShuffleJoin Operator to see they are performing a cross product. ++ * If yes, output a warning to the Session's console. ++ * The Checks made are the following: ++ * 1. MR, Shuffle Join: ++ * Check the parent ReduceSinkOp of the JoinOp. If its keys list is size = 0, then ++ * this is a cross product. ++ * The parent ReduceSinkOp is in the MapWork for the same Stage. ++ * 2. MR, MapJoin: ++ * If the keys expr list on the mapJoin Desc is an empty list for any input, ++ * this implies a cross product. ++ * 3. Tez, Shuffle Join: ++ * Check the parent ReduceSinkOp of the JoinOp. If its keys list is size = 0, then ++ * this is a cross product. ++ * The parent ReduceSinkOp checked is based on the ReduceWork.tagToInput map on the ++ * reduceWork that contains the JoinOp. ++ * 4. Tez, Map Join: ++ * If the keys expr list on the mapJoin Desc is an empty list for any input, ++ * this implies a cross product. ++ */ ++public class CrossProductHandler implements PhysicalPlanResolver, Dispatcher { ++ ++ protected static transient final Logger LOG = LoggerFactory ++ .getLogger(CrossProductHandler.class); ++ private Boolean cartesianProductEdgeEnabled = null; ++ ++ @Override ++ public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { ++ cartesianProductEdgeEnabled = ++ HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.TEZ_CARTESIAN_PRODUCT_EDGE_ENABLED); ++ TaskGraphWalker ogw = new TaskGraphWalker(this); ++ ++ ArrayList topNodes = new ArrayList(); ++ topNodes.addAll(pctx.getRootTasks()); ++ ++ ogw.startWalking(topNodes, null); ++ return pctx; ++ } ++ ++ @Override ++ public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) ++ throws SemanticException { ++ @SuppressWarnings("unchecked") ++ Task currTask = (Task) nd; ++ if (currTask instanceof MapRedTask) { ++ MapRedTask mrTsk = (MapRedTask)currTask; ++ MapredWork mrWrk = mrTsk.getWork(); ++ checkMapJoins(mrTsk); ++ checkMRReducer(currTask.toString(), mrWrk); ++ } else if (currTask instanceof ConditionalTask ) { ++ List> taskListInConditionalTask = ++ ((ConditionalTask) currTask).getListTasks(); ++ for(Task tsk: taskListInConditionalTask){ ++ dispatch(tsk, stack, nodeOutputs); ++ } ++ ++ } else if (currTask instanceof TezTask) { ++ TezTask tezTask = (TezTask) currTask; ++ TezWork tezWork = tezTask.getWork(); ++ checkMapJoins(tezWork); ++ checkTezReducer(tezWork); ++ } ++ return null; ++ } ++ ++ private void warn(String msg) { ++ SessionState.getConsole().printInfo("Warning: " + msg, false); ++ } ++ ++ private void checkMapJoins(MapRedTask mrTsk) throws SemanticException { ++ MapredWork mrWrk = mrTsk.getWork(); ++ MapWork mapWork = mrWrk.getMapWork(); ++ List warnings = new MapJoinCheck(mrTsk.toString()).analyze(mapWork); ++ if (!warnings.isEmpty()) { ++ for (String w : warnings) { ++ warn(w); ++ } ++ } ++ ReduceWork redWork = mrWrk.getReduceWork(); ++ if (redWork != null) { ++ warnings = new MapJoinCheck(mrTsk.toString()).analyze(redWork); ++ if (!warnings.isEmpty()) { ++ for (String w : warnings) { ++ warn(w); ++ } ++ } ++ } ++ } ++ ++ private void checkMapJoins(TezWork tezWork) throws SemanticException { ++ for(BaseWork wrk : tezWork.getAllWork() ) { ++ ++ if ( wrk instanceof MergeJoinWork ) { ++ wrk = ((MergeJoinWork)wrk).getMainWork(); ++ } ++ ++ List warnings = new MapJoinCheck(wrk.getName()).analyze(wrk); ++ if ( !warnings.isEmpty() ) { ++ for(String w : warnings) { ++ warn(w); ++ } ++ } ++ } ++ } ++ ++ private void checkTezReducer(TezWork tezWork) throws SemanticException { ++ for(BaseWork wrk : tezWork.getAllWork() ) { ++ BaseWork origWrk = null; ++ ++ if ( wrk instanceof MergeJoinWork ) { ++ origWrk = wrk; ++ wrk = ((MergeJoinWork)wrk).getMainWork(); ++ } ++ ++ if ( !(wrk instanceof ReduceWork ) ) { ++ continue; ++ } ++ ReduceWork rWork = (ReduceWork) wrk; ++ Operator reducer = ((ReduceWork)wrk).getReducer(); ++ if ( reducer instanceof JoinOperator || reducer instanceof CommonMergeJoinOperator ) { ++ boolean noOuterJoin = ((JoinDesc)reducer.getConf()).isNoOuterJoin(); ++ Map rsInfo = ++ new HashMap(); ++ for(Map.Entry e : rWork.getTagToInput().entrySet()) { ++ rsInfo.putAll(getReducerInfo(tezWork, rWork.getName(), e.getValue())); ++ } ++ if (checkForCrossProduct(rWork.getName(), reducer, rsInfo) ++ && cartesianProductEdgeEnabled && noOuterJoin) { ++ List parents = tezWork.getParents(null == origWrk ? wrk : origWrk); ++ for (BaseWork p: parents) { ++ TezEdgeProperty prop = tezWork.getEdgeProperty(p, null == origWrk ? wrk : origWrk); ++ LOG.info("Edge Type: "+prop.getEdgeType()); ++ if (prop.getEdgeType().equals(EdgeType.CUSTOM_SIMPLE_EDGE) ++ || prop.getEdgeType().equals(EdgeType.CUSTOM_EDGE)) { ++ prop.setEdgeType(EdgeType.XPROD_EDGE); ++ rWork.setNumReduceTasks(-1); ++ rWork.setMaxReduceTasks(-1); ++ rWork.setMinReduceTasks(-1); ++ } ++ } ++ } ++ } ++ } ++ } ++ ++ private void checkMRReducer(String taskName, MapredWork mrWrk) throws SemanticException { ++ ReduceWork rWrk = mrWrk.getReduceWork(); ++ if ( rWrk == null) { ++ return; ++ } ++ Operator reducer = rWrk.getReducer(); ++ if ( reducer instanceof JoinOperator|| reducer instanceof CommonMergeJoinOperator ) { ++ BaseWork parentWork = mrWrk.getMapWork(); ++ checkForCrossProduct(taskName, reducer, ++ new ExtractReduceSinkInfo(null).analyze(parentWork)); ++ } ++ } ++ ++ private boolean checkForCrossProduct(String taskName, ++ Operator reducer, ++ Map rsInfo) { ++ if ( rsInfo.isEmpty() ) { ++ return false; ++ } ++ Iterator it = rsInfo.values().iterator(); ++ ExtractReduceSinkInfo.Info info = it.next(); ++ if (info.keyCols.size() == 0) { ++ List iAliases = new ArrayList(); ++ iAliases.addAll(info.inputAliases); ++ while (it.hasNext()) { ++ info = it.next(); ++ iAliases.addAll(info.inputAliases); ++ } ++ String warning = String.format( ++ "Shuffle Join %s[tables = %s] in Stage '%s' is a cross product", ++ reducer.toString(), ++ iAliases, ++ taskName); ++ warn(warning); ++ return true; ++ } ++ return false; ++ } ++ ++ private Map getReducerInfo(TezWork tezWork, String vertex, String prntVertex) ++ throws SemanticException { ++ BaseWork parentWork = tezWork.getWorkMap().get(prntVertex); ++ return new ExtractReduceSinkInfo(vertex).analyze(parentWork); ++ } ++ ++ /* ++ * Given a Work descriptor and the TaskName for the work ++ * this is responsible to check each MapJoinOp for cross products. ++ * The analyze call returns the warnings list. ++ *

++ * For MR the taskname is the StageName, for Tez it is the vertex name. ++ */ ++ public static class MapJoinCheck implements NodeProcessor, NodeProcessorCtx { ++ ++ final List warnings; ++ final String taskName; ++ ++ MapJoinCheck(String taskName) { ++ this.taskName = taskName; ++ warnings = new ArrayList(); ++ } ++ ++ List analyze(BaseWork work) throws SemanticException { ++ Map opRules = new LinkedHashMap(); ++ opRules.put(new RuleRegExp("R1", MapJoinOperator.getOperatorName() ++ + "%"), this); ++ Dispatcher disp = new DefaultRuleDispatcher(new NoopProcessor(), opRules, this); ++ GraphWalker ogw = new DefaultGraphWalker(disp); ++ ArrayList topNodes = new ArrayList(); ++ topNodes.addAll(work.getAllRootOperators()); ++ ogw.startWalking(topNodes, null); ++ return warnings; ++ } ++ ++ @Override ++ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, ++ Object... nodeOutputs) throws SemanticException { ++ @SuppressWarnings("unchecked") ++ AbstractMapJoinOperator mjOp = (AbstractMapJoinOperator) nd; ++ MapJoinDesc mjDesc = mjOp.getConf(); ++ ++ String bigTablAlias = mjDesc.getBigTableAlias(); ++ if ( bigTablAlias == null ) { ++ Operator parent = null; ++ for(Operator op : mjOp.getParentOperators() ) { ++ if ( op instanceof TableScanOperator ) { ++ parent = op; ++ } ++ } ++ if ( parent != null) { ++ TableScanDesc tDesc = ((TableScanOperator)parent).getConf(); ++ bigTablAlias = tDesc.getAlias(); ++ } ++ } ++ bigTablAlias = bigTablAlias == null ? "?" : bigTablAlias; ++ ++ List joinExprs = mjDesc.getKeys().values().iterator().next(); ++ ++ if ( joinExprs.size() == 0 ) { ++ warnings.add( ++ String.format("Map Join %s[bigTable=%s] in task '%s' is a cross product", ++ mjOp.toString(), bigTablAlias, taskName)); ++ } ++ ++ return null; ++ } ++ } ++ ++ /* ++ * for a given Work Descriptor, it extracts information about the ReduceSinkOps ++ * in the Work. For Tez, you can restrict it to ReduceSinks for a particular output ++ * vertex. ++ */ ++ public static class ExtractReduceSinkInfo implements NodeProcessor, NodeProcessorCtx { ++ ++ static class Info { ++ List keyCols; ++ List inputAliases; ++ ++ Info(List keyCols, List inputAliases) { ++ this.keyCols = keyCols; ++ this.inputAliases = inputAliases == null ? new ArrayList() : inputAliases; ++ } ++ ++ Info(List keyCols, String[] inputAliases) { ++ this.keyCols = keyCols; ++ this.inputAliases = inputAliases == null ? new ArrayList() : Arrays.asList(inputAliases); ++ } ++ } ++ ++ final String outputTaskName; ++ final Map reduceSinkInfo; ++ ++ ExtractReduceSinkInfo(String parentTaskName) { ++ this.outputTaskName = parentTaskName; ++ reduceSinkInfo = new HashMap(); ++ } ++ ++ Map analyze(BaseWork work) throws SemanticException { ++ Map opRules = new LinkedHashMap(); ++ opRules.put(new RuleRegExp("R1", ReduceSinkOperator.getOperatorName() ++ + "%"), this); ++ Dispatcher disp = new DefaultRuleDispatcher(new NoopProcessor(), opRules, this); ++ GraphWalker ogw = new DefaultGraphWalker(disp); ++ ArrayList topNodes = new ArrayList(); ++ topNodes.addAll(work.getAllRootOperators()); ++ ogw.startWalking(topNodes, null); ++ return reduceSinkInfo; ++ } ++ ++ @Override ++ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, ++ Object... nodeOutputs) throws SemanticException { ++ ReduceSinkOperator rsOp = (ReduceSinkOperator) nd; ++ ReduceSinkDesc rsDesc = rsOp.getConf(); ++ ++ if ( outputTaskName != null ) { ++ String rOutputName = rsDesc.getOutputName(); ++ if ( rOutputName == null || !outputTaskName.equals(rOutputName)) { ++ return null; ++ } ++ } ++ ++ reduceSinkInfo.put(rsDesc.getTag(), ++ new Info(rsDesc.getKeyCols(), rsOp.getInputAliases())); ++ ++ return null; ++ } ++ } ++ ++ static class NoopProcessor implements NodeProcessor { ++ @Override ++ public final Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, ++ Object... nodeOutputs) throws SemanticException { ++ return nd; ++ } ++ } ++} +diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java +index 9377563..c040406 100644 +--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java ++++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java +@@ -82,7 +82,7 @@ private void initialize(HiveConf hiveConf) { + } + + if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_CHECK_CROSS_PRODUCT)) { +- resolvers.add(new CrossProductCheck()); ++ resolvers.add(new CrossProductHandler()); + } + + // Vectorization should be the last optimization, because it doesn't modify the plan +diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java +index f904451..cdb3bc8 100644 +--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java ++++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java +@@ -92,10 +92,10 @@ private void checkShuffleJoin(SparkWork sparkWork) throws SemanticException { + for (ReduceWork reduceWork : sparkWork.getAllReduceWork()) { + Operator reducer = reduceWork.getReducer(); + if (reducer instanceof JoinOperator || reducer instanceof CommonMergeJoinOperator) { +- Map rsInfo = +- new HashMap(); ++ Map rsInfo = ++ new HashMap(); + for (BaseWork parent : sparkWork.getParents(reduceWork)) { +- rsInfo.putAll(new CrossProductCheck.ExtractReduceSinkInfo(null).analyze(parent)); ++ rsInfo.putAll(new CrossProductHandler.ExtractReduceSinkInfo(null).analyze(parent)); + } + checkForCrossProduct(reduceWork.getName(), reducer, rsInfo); + } +@@ -106,7 +106,7 @@ private void checkMapJoin(SparkTask sparkTask) throws SemanticException { + SparkWork sparkWork = sparkTask.getWork(); + for (BaseWork baseWork : sparkWork.getAllWork()) { + List warnings = +- new CrossProductCheck.MapJoinCheck(sparkTask.toString()).analyze(baseWork); ++ new CrossProductHandler.MapJoinCheck(sparkTask.toString()).analyze(baseWork); + for (String w : warnings) { + warn(w); + } +@@ -115,12 +115,12 @@ private void checkMapJoin(SparkTask sparkTask) throws SemanticException { + + private void checkForCrossProduct(String workName, + Operator reducer, +- Map rsInfo) { ++ Map rsInfo) { + if (rsInfo.isEmpty()) { + return; + } +- Iterator it = rsInfo.values().iterator(); +- CrossProductCheck.ExtractReduceSinkInfo.Info info = it.next(); ++ Iterator it = rsInfo.values().iterator(); ++ CrossProductHandler.ExtractReduceSinkInfo.Info info = it.next(); + if (info.keyCols.size() == 0) { + List iAliases = new ArrayList(); + iAliases.addAll(info.inputAliases); +diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +index 15836ec..da30c3b 100644 +--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java ++++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +@@ -80,7 +80,7 @@ + import org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkJoinDeDuplication; + import org.apache.hadoop.hive.ql.optimizer.metainfo.annotation.AnnotateWithOpTraits; + import org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer; +-import org.apache.hadoop.hive.ql.optimizer.physical.CrossProductCheck; ++import org.apache.hadoop.hive.ql.optimizer.physical.CrossProductHandler; + import org.apache.hadoop.hive.ql.optimizer.physical.LlapClusterStateForCompile; + import org.apache.hadoop.hive.ql.optimizer.physical.LlapDecider; + import org.apache.hadoop.hive.ql.optimizer.physical.LlapPreVectorizationPass; +@@ -658,7 +658,7 @@ protected void optimizeTaskPlan(List> rootTasks, Pa + } + + if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CHECK_CROSS_PRODUCT)) { +- physicalCtx = new CrossProductCheck().resolve(physicalCtx); ++ physicalCtx = new CrossProductHandler().resolve(physicalCtx); + } else { + LOG.debug("Skipping cross product analysis"); + } +diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TezEdgeProperty.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/TezEdgeProperty.java +index bbed9be..d43b81a 100644 +--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TezEdgeProperty.java ++++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TezEdgeProperty.java +@@ -28,7 +28,8 @@ + CONTAINS,//used for union (all?) + CUSTOM_EDGE,//CO_PARTITION_EDGE + CUSTOM_SIMPLE_EDGE,//PARTITION_EDGE +- ONE_TO_ONE_EDGE ++ ONE_TO_ONE_EDGE, ++ XPROD_EDGE + } + + private HiveConf hiveConf; +@@ -107,4 +108,5 @@ public void setSlowStart(boolean slowStart) { + public void setEdgeType(EdgeType type) { + this.edgeType = type; + } ++ + } +diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java +index 2dc334d..47aa936 100644 +--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java ++++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java +@@ -109,8 +109,8 @@ public Vertex answer(InvocationOnMock invocation) throws Throwable { + }); + + when(utils.createEdge(any(JobConf.class), any(Vertex.class), any(Vertex.class), +- any(TezEdgeProperty.class), any(VertexType.class))).thenAnswer(new Answer() { +- ++ any(TezEdgeProperty.class), any(BaseWork.class), any(TezWork.class))) ++ .thenAnswer(new Answer() { + @Override + public Edge answer(InvocationOnMock invocation) throws Throwable { + Object[] args = invocation.getArguments(); +diff --git a/ql/src/test/queries/clientpositive/cross_prod_1.q b/ql/src/test/queries/clientpositive/cross_prod_1.q +new file mode 100644 +index 0000000..b5a84ea +--- /dev/null ++++ b/ql/src/test/queries/clientpositive/cross_prod_1.q +@@ -0,0 +1,34 @@ ++set hive.mapred.mode=nonstrict; ++set hive.explain.user=false; ++set hive.tez.cartesian-product.enabled=true; ++ ++create table X as ++select distinct * from src order by key limit 10; ++ ++explain select * from X as A, X as B order by A.key, B.key; ++select * from X as A, X as B order by A.key, B.key; ++ ++explain select * from X as A join X as B on A.key 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value + PREHOOK: type: QUERY + PREHOOK: Input: default@myinput1 +@@ -300,7 +300,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in2.txt' into table sm + POSTHOOK: type: LOAD + #### A masked pattern was here #### + POSTHOOK: Output: default@smb_input2 +-Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Map 1' is a cross product ++Warning: Shuffle Join MERGEJOIN[18][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product + PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value + PREHOOK: type: QUERY + PREHOOK: Input: default@myinput1 +diff --git a/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out b/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out +index 5984e8f..04da1f2 100644 +--- a/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out ++++ b/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out +@@ -14,7 +14,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE my + POSTHOOK: type: LOAD + #### A masked pattern was here #### + POSTHOOK: Output: default@myinput1 +-Warning: Map Join MAPJOIN[14][bigTable=?] in task 'Map 1' is a cross product ++Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product + PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b + PREHOOK: type: QUERY + PREHOOK: Input: default@myinput1 +diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out +index 6ef1f34..3acbb20 100644 +--- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out ++++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out +@@ -134,7 +134,7 @@ POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket3out + POSTHOOK: type: LOAD + #### A masked pattern was here #### + POSTHOOK: Output: default@bucket_medium@ds=2008-04-08 +-Warning: Map Join MAPJOIN[34][bigTable=?] in task 'Map 3' is a cross product ++Warning: Shuffle Join MERGEJOIN[34][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Stage 'Reducer 4' is a cross product + PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key + PREHOOK: type: QUERY + POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key +@@ -148,8 +148,9 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Map 3 <- Map 1 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) +- Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) ++ Map 3 <- Map 1 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE) ++ Reducer 4 <- Map 3 (XPROD_EDGE), Map 6 (XPROD_EDGE) ++ Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + #### A masked pattern was here #### + Vertices: + Map 1 +@@ -336,29 +337,12 @@ STAGE PLANS: + 1 Map 2 + Position of Big Table: 2 + Statistics: Num rows: 244 Data size: 43381 Basic stats: COMPLETE Column stats: NONE +- Map Join Operator +- condition map: +- Inner Join 0 to 1 +- Estimated key counts: Map 5 => 1 +- keys: +- 0 +- 1 +- input vertices: +- 1 Map 5 +- Position of Big Table: 0 +- Statistics: Num rows: 244 Data size: 45577 Basic stats: COMPLETE Column stats: NONE +- Group By Operator +- aggregations: count() +- mode: hash +- outputColumnNames: _col0 +- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +- Reduce Output Operator +- null sort order: +- sort order: +- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +- tag: -1 +- value expressions: _col0 (type: bigint) +- auto parallelism: false ++ Reduce Output Operator ++ null sort order: ++ sort order: ++ Statistics: Num rows: 244 Data size: 43381 Basic stats: COMPLETE Column stats: NONE ++ tag: 0 ++ auto parallelism: false + Execution mode: llap + LLAP IO: no inputs + Path -> Alias: +@@ -465,7 +449,7 @@ STAGE PLANS: + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [c] + /bucket_big/ds=2008-04-09 [c] +- Map 5 ++ Map 6 + Map Operator Tree: + TableScan + alias: d +@@ -539,6 +523,30 @@ STAGE PLANS: + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: ++ Merge Join Operator ++ condition map: ++ Inner Join 0 to 1 ++ keys: ++ 0 ++ 1 ++ Position of Big Table: 0 ++ Statistics: Num rows: 244 Data size: 45577 Basic stats: COMPLETE Column stats: NONE ++ Group By Operator ++ aggregations: count() ++ mode: hash ++ outputColumnNames: _col0 ++ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE ++ Reduce Output Operator ++ null sort order: ++ sort order: ++ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE ++ tag: -1 ++ value expressions: _col0 (type: bigint) ++ auto parallelism: false ++ Reducer 5 ++ Execution mode: llap ++ Needs Tagging: false ++ Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial +@@ -573,7 +581,7 @@ STAGE PLANS: + Processor Tree: + ListSink + +-Warning: Map Join MAPJOIN[34][bigTable=?] in task 'Map 3' is a cross product ++Warning: Shuffle Join MERGEJOIN[34][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Stage 'Reducer 4' is a cross product + PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key + PREHOOK: type: QUERY + PREHOOK: Input: default@bucket_big +diff --git a/ql/src/test/results/clientpositive/llap/cross_join.q.out b/ql/src/test/results/clientpositive/llap/cross_join.q.out +index 9d664af..6bde893 100644 +--- a/ql/src/test/results/clientpositive/llap/cross_join.q.out ++++ b/ql/src/test/results/clientpositive/llap/cross_join.q.out +@@ -12,7 +12,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) + #### A masked pattern was here #### + Vertices: + Map 1 +@@ -81,7 +81,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) + #### A masked pattern was here #### + Vertices: + Map 1 +@@ -215,7 +215,7 @@ STAGE PLANS: + Processor Tree: + ListSink + +-Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product ++Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product + PREHOOK: query: explain select src.key from src join src src2 + PREHOOK: type: QUERY + POSTHOOK: query: explain select src.key from src join src src2 +@@ -229,7 +229,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Map 1 <- Map 2 (BROADCAST_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) + #### A masked pattern was here #### + Vertices: + Map 1 +@@ -241,26 +241,13 @@ STAGE PLANS: + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE +- Map Join Operator +- condition map: +- Inner Join 0 to 1 +- keys: +- 0 +- 1 +- outputColumnNames: _col0 +- input vertices: +- 1 Map 2 +- Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE +- File Output Operator +- compressed: false +- Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE +- table: +- input format: org.apache.hadoop.mapred.SequenceFileInputFormat +- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat +- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe ++ Reduce Output Operator ++ sort order: ++ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE ++ value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: no inputs +- Map 2 ++ Map 3 + Map Operator Tree: + TableScan + alias: src2 +@@ -272,6 +259,24 @@ STAGE PLANS: + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs ++ Reducer 2 ++ Execution mode: llap ++ Reduce Operator Tree: ++ Merge Join Operator ++ condition map: ++ Inner Join 0 to 1 ++ keys: ++ 0 ++ 1 ++ outputColumnNames: _col0 ++ Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE ++ File Output Operator ++ compressed: false ++ Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE ++ table: ++ input format: org.apache.hadoop.mapred.SequenceFileInputFormat ++ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat ++ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator +@@ -279,7 +284,7 @@ STAGE PLANS: + Processor Tree: + ListSink + +-Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product ++Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product + PREHOOK: query: explain select src.key from src cross join src src2 + PREHOOK: type: QUERY + POSTHOOK: query: explain select src.key from src cross join src src2 +@@ -293,7 +298,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Map 1 <- Map 2 (BROADCAST_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) + #### A masked pattern was here #### + Vertices: + Map 1 +@@ -305,26 +310,13 @@ STAGE PLANS: + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE +- Map Join Operator +- condition map: +- Inner Join 0 to 1 +- keys: +- 0 +- 1 +- outputColumnNames: _col0 +- input vertices: +- 1 Map 2 +- Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE +- File Output Operator +- compressed: false +- Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE +- table: +- input format: org.apache.hadoop.mapred.SequenceFileInputFormat +- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat +- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe ++ Reduce Output Operator ++ sort order: ++ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE ++ value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: no inputs +- Map 2 ++ Map 3 + Map Operator Tree: + TableScan + alias: src2 +@@ -336,6 +328,24 @@ STAGE PLANS: + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs ++ Reducer 2 ++ Execution mode: llap ++ Reduce Operator Tree: ++ Merge Join Operator ++ condition map: ++ Inner Join 0 to 1 ++ keys: ++ 0 ++ 1 ++ outputColumnNames: _col0 ++ Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE ++ File Output Operator ++ compressed: false ++ Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE ++ table: ++ input format: org.apache.hadoop.mapred.SequenceFileInputFormat ++ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat ++ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator +diff --git a/ql/src/test/results/clientpositive/llap/cross_prod_1.q.out b/ql/src/test/results/clientpositive/llap/cross_prod_1.q.out +new file mode 100644 +index 0000000..fd03fe5 +--- /dev/null ++++ b/ql/src/test/results/clientpositive/llap/cross_prod_1.q.out +@@ -0,0 +1,2502 @@ ++PREHOOK: query: create table X as ++select distinct * from src order by key limit 10 ++PREHOOK: type: CREATETABLE_AS_SELECT ++PREHOOK: Input: default@src ++PREHOOK: Output: database:default ++PREHOOK: Output: default@X ++POSTHOOK: query: create table X as ++select distinct * from src order by key limit 10 ++POSTHOOK: type: CREATETABLE_AS_SELECT ++POSTHOOK: Input: default@src ++POSTHOOK: Output: database:default ++POSTHOOK: Output: default@X ++POSTHOOK: Lineage: x.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] ++POSTHOOK: Lineage: x.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ++Warning: Shuffle Join MERGEJOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product ++PREHOOK: query: explain select * from X as A, X as B order by A.key, B.key ++PREHOOK: type: QUERY ++POSTHOOK: query: explain select * from X as A, X as B order by A.key, B.key ++POSTHOOK: type: QUERY ++STAGE DEPENDENCIES: ++ Stage-1 is a root stage ++ Stage-0 depends on stages: Stage-1 ++ ++STAGE PLANS: ++ Stage: Stage-1 ++ Tez ++#### A masked pattern was here #### ++ Edges: ++ Reducer 2 <- Map 1 (XPROD_EDGE), Map 4 (XPROD_EDGE) ++ Reducer 3 <- Reducer 2 (SIMPLE_EDGE) ++#### A masked pattern was here #### ++ Vertices: ++ Map 1 ++ Map Operator Tree: ++ TableScan ++ alias: a ++ Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE ++ Select Operator ++ expressions: key (type: string), value (type: string) ++ outputColumnNames: _col0, _col1 ++ Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE ++ Reduce Output Operator ++ sort order: ++ Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE ++ value expressions: _col0 (type: string), _col1 (type: string) ++ Execution mode: llap ++ LLAP IO: no inputs ++ Map 4 ++ Map Operator Tree: ++ TableScan ++ alias: b ++ Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE ++ Select Operator ++ expressions: key (type: string), value (type: string) ++ outputColumnNames: _col0, _col1 ++ Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE ++ Reduce Output Operator ++ sort order: ++ Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE ++ value expressions: _col0 (type: string), _col1 (type: string) ++ Execution mode: llap ++ LLAP IO: no inputs ++ Reducer 2 ++ Execution mode: llap ++ Reduce Operator Tree: ++ Merge Join Operator ++ condition map: ++ Inner Join 0 to 1 ++ keys: ++ 0 ++ 1 ++ outputColumnNames: _col0, _col1, _col2, _col3 ++ Statistics: Num rows: 100 Data size: 73700 Basic stats: COMPLETE Column stats: NONE ++ Reduce Output Operator ++ key expressions: _col0 (type: string), _col2 (type: string) ++ sort order: ++ ++ Statistics: Num rows: 100 Data size: 73700 Basic stats: COMPLETE Column stats: NONE ++ value expressions: _col1 (type: string), _col3 (type: string) ++ Reducer 3 ++ Execution mode: llap ++ Reduce Operator Tree: ++ Select Operator ++ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col1 (type: string) ++ outputColumnNames: _col0, _col1, _col2, _col3 ++ Statistics: Num rows: 100 Data size: 73700 Basic stats: COMPLETE Column stats: NONE ++ File Output Operator ++ compressed: false ++ Statistics: Num rows: 100 Data size: 73700 Basic stats: COMPLETE Column stats: NONE ++ table: ++ input format: org.apache.hadoop.mapred.SequenceFileInputFormat ++ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat ++ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe ++ ++ Stage: Stage-0 ++ Fetch Operator ++ limit: -1 ++ Processor Tree: ++ ListSink ++ ++Warning: Shuffle Join MERGEJOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product ++PREHOOK: query: select * from X as A, X as B order by A.key, B.key ++PREHOOK: type: QUERY ++PREHOOK: Input: default@x ++#### A masked pattern was here #### ++POSTHOOK: query: select * from X as A, X as B order by A.key, B.key ++POSTHOOK: type: QUERY ++POSTHOOK: Input: default@x ++#### A masked pattern was here #### ++0 val_0 0 val_0 ++0 val_0 10 val_10 ++0 val_0 100 val_100 ++0 val_0 103 val_103 ++0 val_0 104 val_104 ++0 val_0 105 val_105 ++0 val_0 11 val_11 ++0 val_0 111 val_111 ++0 val_0 113 val_113 ++0 val_0 114 val_114 ++10 val_10 0 val_0 ++10 val_10 10 val_10 ++10 val_10 100 val_100 ++10 val_10 103 val_103 ++10 val_10 104 val_104 ++10 val_10 105 val_105 ++10 val_10 11 val_11 ++10 val_10 111 val_111 ++10 val_10 113 val_113 ++10 val_10 114 val_114 ++100 val_100 0 val_0 ++100 val_100 10 val_10 ++100 val_100 100 val_100 ++100 val_100 103 val_103 ++100 val_100 104 val_104 ++100 val_100 105 val_105 ++100 val_100 11 val_11 ++100 val_100 111 val_111 ++100 val_100 113 val_113 ++100 val_100 114 val_114 ++103 val_103 0 val_0 ++103 val_103 10 val_10 ++103 val_103 100 val_100 ++103 val_103 103 val_103 ++103 val_103 104 val_104 ++103 val_103 105 val_105 ++103 val_103 11 val_11 ++103 val_103 111 val_111 ++103 val_103 113 val_113 ++103 val_103 114 val_114 ++104 val_104 0 val_0 ++104 val_104 10 val_10 ++104 val_104 100 val_100 ++104 val_104 103 val_103 ++104 val_104 104 val_104 ++104 val_104 105 val_105 ++104 val_104 11 val_11 ++104 val_104 111 val_111 ++104 val_104 113 val_113 ++104 val_104 114 val_114 ++105 val_105 0 val_0 ++105 val_105 10 val_10 ++105 val_105 100 val_100 ++105 val_105 103 val_103 ++105 val_105 104 val_104 ++105 val_105 105 val_105 ++105 val_105 11 val_11 ++105 val_105 111 val_111 ++105 val_105 113 val_113 ++105 val_105 114 val_114 ++11 val_11 0 val_0 ++11 val_11 10 val_10 ++11 val_11 100 val_100 ++11 val_11 103 val_103 ++11 val_11 104 val_104 ++11 val_11 105 val_105 ++11 val_11 11 val_11 ++11 val_11 111 val_111 ++11 val_11 113 val_113 ++11 val_11 114 val_114 ++111 val_111 0 val_0 ++111 val_111 10 val_10 ++111 val_111 100 val_100 ++111 val_111 103 val_103 ++111 val_111 104 val_104 ++111 val_111 105 val_105 ++111 val_111 11 val_11 ++111 val_111 111 val_111 ++111 val_111 113 val_113 ++111 val_111 114 val_114 ++113 val_113 0 val_0 ++113 val_113 10 val_10 ++113 val_113 100 val_100 ++113 val_113 103 val_103 ++113 val_113 104 val_104 ++113 val_113 105 val_105 ++113 val_113 11 val_11 ++113 val_113 111 val_111 ++113 val_113 113 val_113 ++113 val_113 114 val_114 ++114 val_114 0 val_0 ++114 val_114 10 val_10 ++114 val_114 100 val_100 ++114 val_114 103 val_103 ++114 val_114 104 val_104 ++114 val_114 105 val_105 ++114 val_114 11 val_11 ++114 val_114 111 val_111 ++114 val_114 113 val_113 ++114 val_114 114 val_114 ++Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product ++PREHOOK: query: explain select * from X as A join X as B on A.key part.p_name) ++PREHOOK: type: QUERY ++POSTHOOK: query: explain select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name) ++POSTHOOK: type: QUERY ++STAGE DEPENDENCIES: ++ Stage-1 is a root stage ++ Stage-0 depends on stages: Stage-1 ++ ++STAGE PLANS: ++ Stage: Stage-1 ++ Tez ++#### A masked pattern was here #### ++ Edges: ++ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) ++ Reducer 4 <- Map 3 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) ++ Reducer 5 <- Reducer 4 (SIMPLE_EDGE) ++ Reducer 7 <- Map 6 (SIMPLE_EDGE) ++#### A masked pattern was here #### ++ Vertices: ++ Map 1 ++ Map Operator Tree: ++ TableScan ++ alias: part ++ Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE ++ Select Operator ++ expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) ++ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 ++ Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE ++ Reduce Output Operator ++ key expressions: _col1 (type: string), _col5 (type: int) ++ sort order: ++ ++ Map-reduce partition columns: _col1 (type: string), _col5 (type: int) ++ Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE ++ value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) ++ Execution mode: llap ++ LLAP IO: no inputs ++ Map 3 ++ Map Operator Tree: ++ TableScan ++ alias: p ++ Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE ++ Select Operator ++ expressions: p_type (type: string), p_size (type: int) ++ outputColumnNames: _col0, _col1 ++ Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE ++ Reduce Output Operator ++ sort order: ++ Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE ++ value expressions: _col0 (type: string), _col1 (type: int) ++ Execution mode: llap ++ LLAP IO: no inputs ++ Map 6 ++ Map Operator Tree: ++ TableScan ++ alias: part ++ Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE ++ Group By Operator ++ keys: p_name (type: string) ++ mode: hash ++ outputColumnNames: _col0 ++ Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE ++ Reduce Output Operator ++ key expressions: _col0 (type: string) ++ sort order: + ++ Map-reduce partition columns: _col0 (type: string) ++ Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE ++ Execution mode: llap ++ LLAP IO: no inputs ++ Reducer 2 ++ Execution mode: llap ++ Reduce Operator Tree: ++ Merge Join Operator ++ condition map: ++ Inner Join 0 to 1 ++ keys: ++ 0 _col1 (type: string), _col5 (type: int) ++ 1 _col1 (type: string), _col0 (type: int) ++ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 ++ Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE ++ File Output Operator ++ compressed: false ++ Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE ++ table: ++ input format: org.apache.hadoop.mapred.SequenceFileInputFormat ++ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat ++ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe ++ Reducer 4 ++ Execution mode: llap ++ Reduce Operator Tree: ++ Merge Join Operator ++ condition map: ++ Inner Join 0 to 1 ++ keys: ++ 0 ++ 1 ++ outputColumnNames: _col0, _col1, _col2 ++ residual filter predicates: {(_col0 <> _col2)} ++ Statistics: Num rows: 338 Data size: 77402 Basic stats: COMPLETE Column stats: COMPLETE ++ Select Operator ++ expressions: _col1 (type: int), _col2 (type: string) ++ outputColumnNames: _col1, _col2 ++ Statistics: Num rows: 338 Data size: 77402 Basic stats: COMPLETE Column stats: COMPLETE ++ Group By Operator ++ aggregations: max(_col1) ++ keys: _col2 (type: string) ++ mode: hash ++ outputColumnNames: _col0, _col1 ++ Statistics: Num rows: 25 Data size: 3125 Basic stats: COMPLETE Column stats: COMPLETE ++ Reduce Output Operator ++ key expressions: _col0 (type: string) ++ sort order: + ++ Map-reduce partition columns: _col0 (type: string) ++ Statistics: Num rows: 25 Data size: 3125 Basic stats: COMPLETE Column stats: COMPLETE ++ value expressions: _col1 (type: int) ++ Reducer 5 ++ Execution mode: llap ++ Reduce Operator Tree: ++ Group By Operator ++ aggregations: max(VALUE._col0) ++ keys: KEY._col0 (type: string) ++ mode: mergepartial ++ outputColumnNames: _col0, _col1 ++ Statistics: Num rows: 25 Data size: 3125 Basic stats: COMPLETE Column stats: COMPLETE ++ Select Operator ++ expressions: _col1 (type: int), _col0 (type: string) ++ outputColumnNames: _col0, _col1 ++ Statistics: Num rows: 25 Data size: 3125 Basic stats: COMPLETE Column stats: COMPLETE ++ Reduce Output Operator ++ key expressions: _col1 (type: string), _col0 (type: int) ++ sort order: ++ ++ Map-reduce partition columns: _col1 (type: string), _col0 (type: int) ++ Statistics: Num rows: 25 Data size: 3125 Basic stats: COMPLETE Column stats: COMPLETE ++ Reducer 7 ++ Execution mode: llap ++ Reduce Operator Tree: ++ Group By Operator ++ keys: KEY._col0 (type: string) ++ mode: mergepartial ++ outputColumnNames: _col0 ++ Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE ++ Reduce Output Operator ++ sort order: ++ Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE ++ value expressions: _col0 (type: string) ++ ++ Stage: Stage-0 ++ Fetch Operator ++ limit: -1 ++ Processor Tree: ++ ListSink ++ ++Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product ++PREHOOK: query: select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name) ++PREHOOK: type: QUERY ++PREHOOK: Input: default@part ++#### A masked pattern was here #### ++POSTHOOK: query: select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name) ++POSTHOOK: type: QUERY ++POSTHOOK: Input: default@part ++#### A masked pattern was here #### ++15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu ++Warning: Shuffle Join MERGEJOIN[41][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product ++PREHOOK: query: explain select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name) ++PREHOOK: type: QUERY ++POSTHOOK: query: explain select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name) ++POSTHOOK: type: QUERY ++STAGE DEPENDENCIES: ++ Stage-1 is a root stage ++ Stage-0 depends on stages: Stage-1 ++ ++STAGE PLANS: ++ Stage: Stage-1 ++ Tez ++#### A masked pattern was here #### ++ Edges: ++ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) ++ Reducer 4 <- Map 3 (XPROD_EDGE), Reducer 8 (XPROD_EDGE) ++ Reducer 5 <- Map 9 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) ++ Reducer 6 <- Reducer 5 (SIMPLE_EDGE) ++ Reducer 8 <- Map 7 (SIMPLE_EDGE) ++#### A masked pattern was here #### ++ Vertices: ++ Map 1 ++ Map Operator Tree: ++ TableScan ++ alias: part ++ Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE ++ Select Operator ++ expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) ++ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 ++ Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE ++ Reduce Output Operator ++ key expressions: _col4 (type: string), _col5 (type: int) ++ sort order: ++ ++ Map-reduce partition columns: _col4 (type: string), _col5 (type: int) ++ Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE ++ value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) ++ Execution mode: llap ++ LLAP IO: no inputs ++ Map 3 ++ Map Operator Tree: ++ TableScan ++ alias: p ++ Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE ++ Filter Operator ++ predicate: p_type is not null (type: boolean) ++ Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE ++ Select Operator ++ expressions: p_name (type: string), p_type (type: string) ++ outputColumnNames: _col0, _col1 ++ Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE ++ Reduce Output Operator ++ sort order: ++ Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE ++ value expressions: _col0 (type: string), _col1 (type: string) ++ Execution mode: llap ++ LLAP IO: no inputs ++ Map 7 ++ Map Operator Tree: ++ TableScan ++ alias: part ++ Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE ++ Group By Operator ++ keys: p_type (type: string) ++ mode: hash ++ outputColumnNames: _col0 ++ Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE ++ Reduce Output Operator ++ key expressions: _col0 (type: string) ++ sort order: + ++ Map-reduce partition columns: _col0 (type: string) ++ Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE ++ Execution mode: llap ++ LLAP IO: no inputs ++ Map 9 ++ Map Operator Tree: ++ TableScan ++ alias: pp ++ Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE ++ Filter Operator ++ predicate: p_type is not null (type: boolean) ++ Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE ++ Select Operator ++ expressions: p_type (type: string), p_size (type: int) ++ outputColumnNames: _col0, _col1 ++ Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE ++ Reduce Output Operator ++ key expressions: _col0 (type: string) ++ sort order: + ++ Map-reduce partition columns: _col0 (type: string) ++ Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE ++ value expressions: _col1 (type: int) ++ Execution mode: llap ++ LLAP IO: no inputs ++ Reducer 2 ++ Execution mode: llap ++ Reduce Operator Tree: ++ Merge Join Operator ++ condition map: ++ Inner Join 0 to 1 ++ keys: ++ 0 _col4 (type: string), _col5 (type: int) ++ 1 _col1 (type: string), _col0 (type: int) ++ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 ++ Statistics: Num rows: 198 Data size: 122562 Basic stats: COMPLETE Column stats: COMPLETE ++ File Output Operator ++ compressed: false ++ Statistics: Num rows: 198 Data size: 122562 Basic stats: COMPLETE Column stats: COMPLETE ++ table: ++ input format: org.apache.hadoop.mapred.SequenceFileInputFormat ++ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat ++ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe ++ Reducer 4 ++ Execution mode: llap ++ Reduce Operator Tree: ++ Merge Join Operator ++ condition map: ++ Inner Join 0 to 1 ++ keys: ++ 0 ++ 1 ++ outputColumnNames: _col0, _col1, _col2 ++ residual filter predicates: {(_col2 <> _col0)} ++ Statistics: Num rows: 338 Data size: 111202 Basic stats: COMPLETE Column stats: COMPLETE ++ Reduce Output Operator ++ key expressions: _col1 (type: string) ++ sort order: + ++ Map-reduce partition columns: _col1 (type: string) ++ Statistics: Num rows: 338 Data size: 111202 Basic stats: COMPLETE Column stats: COMPLETE ++ value expressions: _col2 (type: string) ++ Reducer 5 ++ Execution mode: llap ++ Reduce Operator Tree: ++ Merge Join Operator ++ condition map: ++ Inner Join 0 to 1 ++ keys: ++ 0 _col1 (type: string) ++ 1 _col0 (type: string) ++ outputColumnNames: _col2, _col4 ++ Statistics: Num rows: 366 Data size: 39528 Basic stats: COMPLETE Column stats: COMPLETE ++ Group By Operator ++ keys: _col2 (type: string), _col4 (type: int) ++ mode: hash ++ outputColumnNames: _col0, _col1 ++ Statistics: Num rows: 183 Data size: 19764 Basic stats: COMPLETE Column stats: COMPLETE ++ Reduce Output Operator ++ key expressions: _col0 (type: string), _col1 (type: int) ++ sort order: ++ ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) ++ Statistics: Num rows: 183 Data size: 19764 Basic stats: COMPLETE Column stats: COMPLETE ++ Reducer 6 ++ Execution mode: llap ++ Reduce Operator Tree: ++ Group By Operator ++ keys: KEY._col0 (type: string), KEY._col1 (type: int) ++ mode: mergepartial ++ outputColumnNames: _col0, _col1 ++ Statistics: Num rows: 183 Data size: 19764 Basic stats: COMPLETE Column stats: COMPLETE ++ Select Operator ++ expressions: _col1 (type: int), _col0 (type: string) ++ outputColumnNames: _col0, _col1 ++ Statistics: Num rows: 183 Data size: 19764 Basic stats: COMPLETE Column stats: COMPLETE ++ Reduce Output Operator ++ key expressions: _col1 (type: string), _col0 (type: int) ++ sort order: ++ ++ Map-reduce partition columns: _col1 (type: string), _col0 (type: int) ++ Statistics: Num rows: 183 Data size: 19764 Basic stats: COMPLETE Column stats: COMPLETE ++ Reducer 8 ++ Execution mode: llap ++ Reduce Operator Tree: ++ Group By Operator ++ keys: KEY._col0 (type: string) ++ mode: mergepartial ++ outputColumnNames: _col0 ++ Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE ++ Reduce Output Operator ++ sort order: ++ Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE ++ value expressions: _col0 (type: string) ++ ++ Stage: Stage-0 ++ Fetch Operator ++ limit: -1 ++ Processor Tree: ++ ListSink ++ ++Warning: Shuffle Join MERGEJOIN[41][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product ++PREHOOK: query: select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name) ++PREHOOK: type: QUERY ++PREHOOK: Input: default@part ++#### A masked pattern was here #### ++POSTHOOK: query: select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name) ++POSTHOOK: type: QUERY ++POSTHOOK: Input: default@part ++#### A masked pattern was here #### ++105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ ++110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously ++112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car ++121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h ++121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h ++132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even ++144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about ++146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref ++15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu ++155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra ++17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the ++17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve ++191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle ++192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir ++195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de ++33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful ++40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s ++42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl ++45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful ++48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i ++49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick ++65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr ++78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith ++85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull ++86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully ++90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl +diff --git a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out +index a98a011..96fe17a 100644 +--- a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out ++++ b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out +@@ -262,7 +262,7 @@ STAGE PLANS: + #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) +- Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) ++ Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 7 <- Map 5 (CUSTOM_SIMPLE_EDGE) +@@ -463,7 +463,7 @@ STAGE PLANS: + #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) +- Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE) ++ Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 8 (XPROD_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +@@ -647,41 +647,41 @@ STAGE PLANS: + Processor Tree: + ListSink + +-Warning: Shuffle Join MERGEJOIN[41][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +-PREHOOK: query: select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_type from part_null) ++Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product ++PREHOOK: query: select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_type from part_null) order by part_null.p_partkey + PREHOOK: type: QUERY + PREHOOK: Input: default@part_null + #### A masked pattern was here #### +-POSTHOOK: query: select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_type from part_null) ++POSTHOOK: query: select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_type from part_null) order by part_null.p_partkey + POSTHOOK: type: QUERY + POSTHOOK: Input: default@part_null + #### A masked pattern was here #### ++15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu ++17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the ++17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve ++33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful ++40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s ++42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl ++45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful ++48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i ++49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick + 65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr ++78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith + 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull + 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully ++90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl ++105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ ++110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously ++112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car + 121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h + 121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +-110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously +-191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +-105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ +-146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref + 132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even +-195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +-90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl +-40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +-112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car + 144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about +-17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +-33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +-49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +-17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve +-48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +-45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +-42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +-192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir +-78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith ++146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref + 155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +-15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu ++191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle ++192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir ++195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de + Warning: Shuffle Join MERGEJOIN[42][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product + PREHOOK: query: explain select * from part_null where p_brand IN (select p_brand from part_null) AND p_brand NOT IN (select p_name from part_null) + PREHOOK: type: QUERY +@@ -697,7 +697,7 @@ STAGE PLANS: + #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) +- Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE) ++ Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 8 (XPROD_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +@@ -907,7 +907,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 4 <- Map 8 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) +@@ -1135,7 +1135,7 @@ STAGE PLANS: + #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +- Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) ++ Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) + #### A masked pattern was here #### +@@ -1265,43 +1265,43 @@ STAGE PLANS: + Processor Tree: + ListSink + +-Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +-PREHOOK: query: select * from part_null where p_name IN (select p_name from part_null) AND EXISTS (select c from tnull) ++Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product ++PREHOOK: query: select * from part_null where p_name IN (select p_name from part_null) AND EXISTS (select c from tnull) order by part_null.p_partkey + PREHOOK: type: QUERY + PREHOOK: Input: default@part_null + PREHOOK: Input: default@tnull + #### A masked pattern was here #### +-POSTHOOK: query: select * from part_null where p_name IN (select p_name from part_null) AND EXISTS (select c from tnull) ++POSTHOOK: query: select * from part_null where p_name IN (select p_name from part_null) AND EXISTS (select c from tnull) order by part_null.p_partkey + POSTHOOK: type: QUERY + POSTHOOK: Input: default@part_null + POSTHOOK: Input: default@tnull + #### A masked pattern was here #### +-192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir +-121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +-121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +-90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl +-85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull ++15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu + 17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +-49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick ++17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve ++33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful ++40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s + 42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl ++45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful ++48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i ++49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick ++65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr ++78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith ++85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull ++86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully ++90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl ++105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ ++110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously + 112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car +-40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s ++121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h ++121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h ++132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even + 144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about +-110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously ++146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref + 155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +-105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ +-48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i + 191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +-86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +-15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +-45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +-146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +-65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +-132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even ++192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir + 195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +-17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve +-33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +-78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith + Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product + PREHOOK: query: explain select * from part_null where p_size IN (select p_size from part_null) AND EXISTS (select c from tempty) + PREHOOK: type: QUERY +@@ -1317,7 +1317,7 @@ STAGE PLANS: + #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +- Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) ++ Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) + #### A masked pattern was here #### +@@ -2903,7 +2903,7 @@ STAGE PLANS: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +- Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE), Reducer 10 (CUSTOM_SIMPLE_EDGE) ++ Reducer 7 <- Map 6 (XPROD_EDGE), Reducer 10 (XPROD_EDGE) + Reducer 8 <- Reducer 11 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + #### A masked pattern was here #### + Vertices: +@@ -3701,7 +3701,7 @@ STAGE PLANS: + #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +- Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) ++ Reducer 4 <- Map 3 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) + #### A masked pattern was here #### + Vertices: +@@ -3872,7 +3872,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 5 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 4 (SIMPLE_EDGE) +@@ -4035,7 +4035,7 @@ STAGE PLANS: + #### A masked pattern was here #### + Edges: + Reducer 10 <- Map 9 (SIMPLE_EDGE) +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 10 (CUSTOM_SIMPLE_EDGE), Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +diff --git a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out +index d8f279e..5cc2809 100644 +--- a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out ++++ b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out +@@ -24,7 +24,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 5 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 4 (SIMPLE_EDGE) +@@ -605,7 +605,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +@@ -1477,7 +1477,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) + Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE) +@@ -1840,7 +1840,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +@@ -2030,8 +2030,8 @@ POSTHOOK: Input: default@part + 42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl + 49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick + 33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +-105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ + 17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the ++105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ + 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl + 78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith + 132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even +@@ -2055,7 +2055,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +@@ -2504,7 +2504,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +@@ -3588,7 +3588,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +@@ -4161,7 +4161,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) +@@ -4389,7 +4389,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) +@@ -4614,7 +4614,7 @@ STAGE PLANS: + Map 7 <- Union 5 (CONTAINS) + Map 8 <- Union 9 (CONTAINS) + Reducer 10 <- Union 9 (SIMPLE_EDGE) +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) + Reducer 3 <- Reducer 10 (ONE_TO_ONE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 6 <- Union 5 (CUSTOM_SIMPLE_EDGE) + #### A masked pattern was here #### +@@ -5580,7 +5580,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 5 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 4 (SIMPLE_EDGE) +@@ -6826,7 +6826,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 5 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 4 (SIMPLE_EDGE) +@@ -6981,7 +6981,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) + Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE) +@@ -7147,9 +7147,9 @@ STAGE PLANS: + Reducer 10 <- Map 8 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) +- Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 9 (CUSTOM_SIMPLE_EDGE) ++ Reducer 4 <- Map 1 (XPROD_EDGE), Reducer 9 (XPROD_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +- Reducer 6 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 10 (CUSTOM_SIMPLE_EDGE) ++ Reducer 6 <- Map 1 (XPROD_EDGE), Reducer 10 (XPROD_EDGE) + Reducer 7 <- Reducer 6 (SIMPLE_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE) + #### A masked pattern was here #### +diff --git a/ql/src/test/results/clientpositive/llap/subquery_null_agg.q.out b/ql/src/test/results/clientpositive/llap/subquery_null_agg.q.out +index 33ebbba..0a9aa6c 100644 +--- a/ql/src/test/results/clientpositive/llap/subquery_null_agg.q.out ++++ b/ql/src/test/results/clientpositive/llap/subquery_null_agg.q.out +@@ -55,7 +55,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE) +diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +index a738980..c89d053 100644 +--- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out ++++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +@@ -89,7 +89,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + #### A masked pattern was here #### + Vertices: +@@ -217,7 +217,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Map 5 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + #### A masked pattern was here #### + Vertices: +@@ -331,7 +331,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + #### A masked pattern was here #### + Vertices: +@@ -450,7 +450,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +@@ -650,7 +650,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + #### A masked pattern was here #### + Vertices: +@@ -946,7 +946,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 6 (XPROD_EDGE), Reducer 8 (XPROD_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +@@ -1188,7 +1188,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + #### A masked pattern was here #### + Vertices: +@@ -1886,7 +1886,7 @@ STAGE PLANS: + #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +- Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) ++ Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) + #### A masked pattern was here #### +@@ -2082,7 +2082,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 5 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) +@@ -2404,9 +2404,9 @@ STAGE PLANS: + #### A masked pattern was here #### + Edges: + Reducer 10 <- Map 7 (SIMPLE_EDGE) +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +- Reducer 4 <- Reducer 10 (CUSTOM_SIMPLE_EDGE), Reducer 3 (CUSTOM_SIMPLE_EDGE), Reducer 9 (CUSTOM_SIMPLE_EDGE) ++ Reducer 4 <- Reducer 10 (XPROD_EDGE), Reducer 3 (XPROD_EDGE), Reducer 9 (XPROD_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) + Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) +@@ -2653,7 +2653,7 @@ STAGE PLANS: + #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +- Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) ++ Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 5 (XPROD_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) + #### A masked pattern was here #### + Vertices: +@@ -2790,7 +2790,7 @@ STAGE PLANS: + Reducer 15 <- Reducer 14 (SIMPLE_EDGE), Union 6 (CONTAINS) + Reducer 16 <- Map 13 (CUSTOM_SIMPLE_EDGE) + Reducer 17 <- Reducer 16 (SIMPLE_EDGE), Union 11 (CONTAINS) +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 12 (XPROD_EDGE), Reducer 8 (XPROD_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Union 6 (CONTAINS) + Reducer 7 <- Union 6 (SIMPLE_EDGE) +@@ -3335,7 +3335,7 @@ STAGE PLANS: + #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) +- Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE) ++ Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 8 (XPROD_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +@@ -3571,7 +3571,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) +@@ -5095,7 +5095,7 @@ STAGE PLANS: + #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +- Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) ++ Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) + #### A masked pattern was here #### +@@ -5295,7 +5295,7 @@ STAGE PLANS: + #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +- Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) ++ Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) + Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) + #### A masked pattern was here #### + Vertices: +@@ -5442,7 +5442,7 @@ STAGE PLANS: + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +- Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) ++ Reducer 4 <- Reducer 3 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) + #### A masked pattern was here #### + Vertices: +@@ -5613,7 +5613,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 5 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) +@@ -5933,7 +5933,7 @@ STAGE PLANS: + #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +- Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) ++ Reducer 4 <- Map 3 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) + #### A masked pattern was here #### +@@ -6145,7 +6145,7 @@ STAGE PLANS: + #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +- Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) ++ Reducer 4 <- Map 3 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) + #### A masked pattern was here #### +@@ -6315,7 +6315,7 @@ STAGE PLANS: + #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) +- Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) ++ Reducer 3 <- Map 1 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) + #### A masked pattern was here #### +@@ -6729,7 +6729,7 @@ STAGE PLANS: + #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +- Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) ++ Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 5 (XPROD_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) + #### A masked pattern was here #### + Vertices: +@@ -6854,7 +6854,7 @@ STAGE PLANS: + #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +- Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) ++ Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) + #### A masked pattern was here #### + Vertices: +diff --git a/ql/src/test/results/clientpositive/llap/subquery_select.q.out b/ql/src/test/results/clientpositive/llap/subquery_select.q.out +index 7175be0c..118f6eb 100644 +--- a/ql/src/test/results/clientpositive/llap/subquery_select.q.out ++++ b/ql/src/test/results/clientpositive/llap/subquery_select.q.out +@@ -16,7 +16,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 1 (CUSTOM_SIMPLE_EDGE) +@@ -402,7 +402,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 1 (CUSTOM_SIMPLE_EDGE) +@@ -2153,7 +2153,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 5 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 4 (SIMPLE_EDGE) +@@ -2863,11 +2863,11 @@ POSTHOOK: Input: default@part + 6 false + 18 false + 45 false +-Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +-Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +-PREHOOK: query: explain select p_type, (select p_size from part order by p_size limit 1) = 1 from part ++Warning: Shuffle Join MERGEJOIN[31][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product ++Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product ++PREHOOK: query: explain select p_type, (select p_size from part order by p_size limit 1) = 1 from part order by p_type + PREHOOK: type: QUERY +-POSTHOOK: query: explain select p_type, (select p_size from part order by p_size limit 1) = 1 from part ++POSTHOOK: query: explain select p_type, (select p_size from part order by p_size limit 1) = 1 from part order by p_type + POSTHOOK: type: QUERY + STAGE DEPENDENCIES: + Stage-1 is a root stage +@@ -2878,11 +2878,12 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) +- Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) +- Reducer 5 <- Map 4 (SIMPLE_EDGE) +- Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +- Reducer 7 <- Map 4 (SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) ++ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE) ++ Reducer 4 <- Reducer 3 (SIMPLE_EDGE) ++ Reducer 6 <- Map 5 (SIMPLE_EDGE) ++ Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) ++ Reducer 8 <- Map 5 (SIMPLE_EDGE) + #### A masked pattern was here #### + Vertices: + Map 1 +@@ -2900,7 +2901,7 @@ STAGE PLANS: + value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: no inputs +- Map 4 ++ Map 5 + Map Operator Tree: + TableScan + alias: part +@@ -2950,14 +2951,26 @@ STAGE PLANS: + expressions: _col0 (type: string), (_col2 = 1) (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE +- File Output Operator +- compressed: false ++ Reduce Output Operator ++ key expressions: _col0 (type: string) ++ sort order: + + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE +- table: +- input format: org.apache.hadoop.mapred.SequenceFileInputFormat +- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat +- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +- Reducer 5 ++ value expressions: _col1 (type: boolean) ++ Reducer 4 ++ Execution mode: llap ++ Reduce Operator Tree: ++ Select Operator ++ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean) ++ outputColumnNames: _col0, _col1 ++ Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE ++ File Output Operator ++ compressed: false ++ Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE ++ table: ++ input format: org.apache.hadoop.mapred.SequenceFileInputFormat ++ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat ++ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe ++ Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Select Operator +@@ -2974,7 +2987,7 @@ STAGE PLANS: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) +- Reducer 6 ++ Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator +@@ -2990,7 +3003,7 @@ STAGE PLANS: + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE +- Reducer 7 ++ Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Select Operator +@@ -3011,42 +3024,42 @@ STAGE PLANS: + Processor Tree: + ListSink + +-Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +-Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +-PREHOOK: query: select p_type, (select p_size from part order by p_size limit 1) = 1 from part ++Warning: Shuffle Join MERGEJOIN[31][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product ++Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product ++PREHOOK: query: select p_type, (select p_size from part order by p_size limit 1) = 1 from part order by p_type + PREHOOK: type: QUERY + PREHOOK: Input: default@part + #### A masked pattern was here #### +-POSTHOOK: query: select p_type, (select p_size from part order by p_size limit 1) = 1 from part ++POSTHOOK: query: select p_type, (select p_size from part order by p_size limit 1) = 1 from part order by p_type + POSTHOOK: type: QUERY + POSTHOOK: Input: default@part + #### A masked pattern was here #### +-PROMO PLATED TIN true +-PROMO PLATED TIN true ++ECONOMY BRUSHED COPPER true ++ECONOMY BURNISHED STEEL true ++ECONOMY PLATED COPPER true ++ECONOMY POLISHED STEEL true ++LARGE BRUSHED BRASS true + LARGE BRUSHED STEEL true +-PROMO BURNISHED NICKEL true +-STANDARD ANODIZED STEEL true + LARGE BURNISHED STEEL true + MEDIUM ANODIZED COPPER true +-ECONOMY POLISHED STEEL true ++MEDIUM BURNISHED BRASS true + MEDIUM BURNISHED COPPER true +-SMALL POLISHED NICKEL true +-STANDARD PLATED TIN true ++MEDIUM BURNISHED TIN true + MEDIUM BURNISHED TIN true + PROMO ANODIZED TIN true +-MEDIUM BURNISHED BRASS true +-ECONOMY PLATED COPPER true +-STANDARD POLISHED STEEL true +-SMALL BRUSHED BRASS true ++PROMO BURNISHED NICKEL true ++PROMO PLATED TIN true ++PROMO PLATED TIN true + PROMO POLISHED STEEL true ++SMALL BRUSHED BRASS true ++SMALL PLATED BRASS true + SMALL PLATED STEEL true +-ECONOMY BRUSHED COPPER true ++SMALL POLISHED NICKEL true ++STANDARD ANODIZED STEEL true + STANDARD ANODIZED TIN true +-MEDIUM BURNISHED TIN true + STANDARD BURNISHED TIN true +-SMALL PLATED BRASS true +-ECONOMY BURNISHED STEEL true +-LARGE BRUSHED BRASS true ++STANDARD PLATED TIN true ++STANDARD POLISHED STEEL true + Warning: Shuffle Join MERGEJOIN[63][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product + PREHOOK: query: EXPLAIN SELECT p_size, p_size IN ( + SELECT MAX(p_size) FROM part p where p.p_type = part.p_type) AND +@@ -3071,7 +3084,7 @@ STAGE PLANS: + Reducer 11 <- Map 9 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +- Reducer 4 <- Reducer 10 (CUSTOM_SIMPLE_EDGE), Reducer 3 (CUSTOM_SIMPLE_EDGE) ++ Reducer 4 <- Reducer 10 (XPROD_EDGE), Reducer 3 (XPROD_EDGE) + Reducer 5 <- Reducer 11 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) + Reducer 8 <- Map 6 (SIMPLE_EDGE) +@@ -3507,7 +3520,7 @@ STAGE PLANS: + #### A masked pattern was here #### + Edges: + Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) + Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 11 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) +@@ -4182,7 +4195,7 @@ STAGE PLANS: + #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +- Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) ++ Reducer 3 <- Map 1 (XPROD_EDGE), Reducer 5 (XPROD_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) + #### A masked pattern was here #### +diff --git a/ql/src/test/results/clientpositive/llap/tez_self_join.q.out b/ql/src/test/results/clientpositive/llap/tez_self_join.q.out +index 01b822d..c5f4612 100644 +--- a/ql/src/test/results/clientpositive/llap/tez_self_join.q.out ++++ b/ql/src/test/results/clientpositive/llap/tez_self_join.q.out +@@ -70,7 +70,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Map 4 (XPROD_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + #### A masked pattern was here #### + Vertices: +diff --git a/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out b/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out +index a0b3482..13bc6dd 100644 +--- a/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out ++++ b/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out +@@ -60,7 +60,7 @@ POSTHOOK: Output: default@TINT + POSTHOOK: Lineage: tint.cint SIMPLE [(tint_txt)tint_txt.FieldSchema(name:cint, type:int, comment:null), ] + POSTHOOK: Lineage: tint.rnum SIMPLE [(tint_txt)tint_txt.FieldSchema(name:rnum, type:int, comment:null), ] + tint_txt.rnum tint_txt.cint +-Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product ++Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product + PREHOOK: query: explain vectorization expression + select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint + PREHOOK: type: QUERY +@@ -81,7 +81,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Map 1 <- Map 2 (BROADCAST_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) + #### A masked pattern was here #### + Vertices: + Map 1 +@@ -100,39 +100,14 @@ STAGE PLANS: + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE +- Map Join Operator +- condition map: +- Inner Join 0 to 1 +- keys: +- 0 +- 1 +- Map Join Vectorization: +- className: VectorMapJoinInnerMultiKeyOperator ++ Reduce Output Operator ++ sort order: ++ Reduce Sink Vectorization: ++ className: VectorReduceSinkEmptyKeyOperator + native: true +- nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true +- outputColumnNames: _col0, _col1, _col2, _col3 +- input vertices: +- 1 Map 2 +- Statistics: Num rows: 25 Data size: 425 Basic stats: COMPLETE Column stats: NONE +- Select Operator +- expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint), CASE WHEN (_col1 BETWEEN UDFToInteger(_col3) AND UDFToInteger(_col3)) THEN ('Ok') ELSE ('NoOk') END (type: string) +- outputColumnNames: _col0, _col1, _col2, _col3, _col4 +- Select Vectorization: +- className: VectorSelectOperator +- native: true +- projectedOutputColumns: [0, 2, 1, 3, 5] +- selectExpressions: IfExprStringScalarStringScalar(col 4, val Ok, val NoOk)(children: VectorUDFAdaptor(_col1 BETWEEN UDFToInteger(_col3) AND UDFToInteger(_col3))(children: col 3, col 3) -> 4:boolean) -> 5:String +- Statistics: Num rows: 25 Data size: 425 Basic stats: COMPLETE Column stats: NONE +- File Output Operator +- compressed: false +- File Sink Vectorization: +- className: VectorFileSinkOperator +- native: false +- Statistics: Num rows: 25 Data size: 425 Basic stats: COMPLETE Column stats: NONE +- table: +- input format: org.apache.hadoop.mapred.SequenceFileInputFormat +- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat +- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe ++ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true ++ Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE ++ value expressions: _col0 (type: int), _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: +@@ -140,10 +115,10 @@ STAGE PLANS: + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +- allNative: false +- usesVectorUDFAdaptor: true ++ allNative: true ++ usesVectorUDFAdaptor: false + vectorized: true +- Map 2 ++ Map 3 + Map Operator Tree: + TableScan + alias: tsint +@@ -177,6 +152,28 @@ STAGE PLANS: + allNative: true + usesVectorUDFAdaptor: false + vectorized: true ++ Reducer 2 ++ Execution mode: llap ++ Reduce Operator Tree: ++ Merge Join Operator ++ condition map: ++ Inner Join 0 to 1 ++ keys: ++ 0 ++ 1 ++ outputColumnNames: _col0, _col1, _col2, _col3 ++ Statistics: Num rows: 25 Data size: 425 Basic stats: COMPLETE Column stats: NONE ++ Select Operator ++ expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint), CASE WHEN (_col1 BETWEEN UDFToInteger(_col3) AND UDFToInteger(_col3)) THEN ('Ok') ELSE ('NoOk') END (type: string) ++ outputColumnNames: _col0, _col1, _col2, _col3, _col4 ++ Statistics: Num rows: 25 Data size: 425 Basic stats: COMPLETE Column stats: NONE ++ File Output Operator ++ compressed: false ++ Statistics: Num rows: 25 Data size: 425 Basic stats: COMPLETE Column stats: NONE ++ table: ++ input format: org.apache.hadoop.mapred.SequenceFileInputFormat ++ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat ++ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator +@@ -184,7 +181,7 @@ STAGE PLANS: + Processor Tree: + ListSink + +-Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product ++Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product + PREHOOK: query: select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint + PREHOOK: type: QUERY + PREHOOK: Input: default@tint +@@ -221,7 +218,7 @@ tint.rnum tsint.rnum tint.cint tsint.csint between_col + 4 2 10 0 NoOk + 4 3 10 1 NoOk + 4 4 10 10 Ok +-Warning: Map Join MAPJOIN[10][bigTable=?] in task 'Map 1' is a cross product ++Warning: Shuffle Join MERGEJOIN[10][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product + PREHOOK: query: explain vectorization expression + select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint + PREHOOK: type: QUERY +@@ -242,7 +239,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Map 1 <- Map 2 (BROADCAST_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) + #### A masked pattern was here #### + Vertices: + Map 1 +@@ -261,45 +258,14 @@ STAGE PLANS: + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE +- Map Join Operator +- condition map: +- Inner Join 0 to 1 +- keys: +- 0 +- 1 +- Map Join Vectorization: +- className: VectorMapJoinInnerMultiKeyOperator ++ Reduce Output Operator ++ sort order: ++ Reduce Sink Vectorization: ++ className: VectorReduceSinkEmptyKeyOperator + native: true +- nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true +- outputColumnNames: _col0, _col1, _col2, _col3 +- input vertices: +- 1 Map 2 +- Statistics: Num rows: 25 Data size: 425 Basic stats: COMPLETE Column stats: NONE +- Filter Operator +- Filter Vectorization: +- className: VectorFilterOperator +- native: true +- predicateExpression: SelectColumnIsTrue(col 4)(children: VectorUDFAdaptor(_col1 BETWEEN UDFToInteger(_col3) AND UDFToInteger(_col3))(children: col 3, col 3) -> 4:boolean) -> boolean +- predicate: _col1 BETWEEN UDFToInteger(_col3) AND UDFToInteger(_col3) (type: boolean) +- Statistics: Num rows: 2 Data size: 34 Basic stats: COMPLETE Column stats: NONE +- Select Operator +- expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint) +- outputColumnNames: _col0, _col1, _col2, _col3 +- Select Vectorization: +- className: VectorSelectOperator +- native: true +- projectedOutputColumns: [0, 2, 1, 3] +- Statistics: Num rows: 2 Data size: 34 Basic stats: COMPLETE Column stats: NONE +- File Output Operator +- compressed: false +- File Sink Vectorization: +- className: VectorFileSinkOperator +- native: false +- Statistics: Num rows: 2 Data size: 34 Basic stats: COMPLETE Column stats: NONE +- table: +- input format: org.apache.hadoop.mapred.SequenceFileInputFormat +- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat +- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe ++ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true ++ Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE ++ value expressions: _col0 (type: int), _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: +@@ -307,10 +273,10 @@ STAGE PLANS: + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +- allNative: false +- usesVectorUDFAdaptor: true ++ allNative: true ++ usesVectorUDFAdaptor: false + vectorized: true +- Map 2 ++ Map 3 + Map Operator Tree: + TableScan + alias: tsint +@@ -344,6 +310,31 @@ STAGE PLANS: + allNative: true + usesVectorUDFAdaptor: false + vectorized: true ++ Reducer 2 ++ Execution mode: llap ++ Reduce Operator Tree: ++ Merge Join Operator ++ condition map: ++ Inner Join 0 to 1 ++ keys: ++ 0 ++ 1 ++ outputColumnNames: _col0, _col1, _col2, _col3 ++ Statistics: Num rows: 25 Data size: 425 Basic stats: COMPLETE Column stats: NONE ++ Filter Operator ++ predicate: _col1 BETWEEN UDFToInteger(_col3) AND UDFToInteger(_col3) (type: boolean) ++ Statistics: Num rows: 2 Data size: 34 Basic stats: COMPLETE Column stats: NONE ++ Select Operator ++ expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint) ++ outputColumnNames: _col0, _col1, _col2, _col3 ++ Statistics: Num rows: 2 Data size: 34 Basic stats: COMPLETE Column stats: NONE ++ File Output Operator ++ compressed: false ++ Statistics: Num rows: 2 Data size: 34 Basic stats: COMPLETE Column stats: NONE ++ table: ++ input format: org.apache.hadoop.mapred.SequenceFileInputFormat ++ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat ++ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator +@@ -351,7 +342,7 @@ STAGE PLANS: + Processor Tree: + ListSink + +-Warning: Map Join MAPJOIN[10][bigTable=?] in task 'Map 1' is a cross product ++Warning: Shuffle Join MERGEJOIN[10][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product + PREHOOK: query: select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint + PREHOOK: type: QUERY + PREHOOK: Input: default@tint +diff --git a/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out b/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out +index 2f3f886..2268a15 100644 +--- a/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out ++++ b/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out +@@ -610,7 +610,7 @@ b str + two line1 + four line2 + six line3 +-Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 4' is a cross product ++Warning: Shuffle Join MERGEJOIN[15][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_0]] in Stage 'Reducer 2' is a cross product + PREHOOK: query: EXPLAIN VECTORIZATION DETAIL + INSERT INTO TABLE orc_create_complex + SELECT orc_create_staging.*, src1.key FROM orc_create_staging cross join src src1 cross join orc_create_staging spam1 cross join orc_create_staging spam2 +@@ -635,7 +635,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Map 4 <- Map 1 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE), Map 4 (XPROD_EDGE), Map 5 (XPROD_EDGE) + #### A masked pattern was here #### + Vertices: + Map 1 +@@ -678,7 +678,7 @@ STAGE PLANS: + includeColumns: [0, 1, 2, 3] + dataColumns: str:string, mp:map, lst:array, strct:struct + partitionColumnCount: 0 +- Map 2 ++ Map 3 + Map Operator Tree: + TableScan + alias: spam2 +@@ -715,7 +715,7 @@ STAGE PLANS: + includeColumns: [] + dataColumns: str:string, mp:map, lst:array, strct:struct + partitionColumnCount: 0 +- Map 3 ++ Map 4 + Map Operator Tree: + TableScan + alias: spam1 +@@ -752,7 +752,7 @@ STAGE PLANS: + includeColumns: [] + dataColumns: str:string, mp:map, lst:array, strct:struct + partitionColumnCount: 0 +- Map 4 ++ Map 5 + Map Operator Tree: + TableScan + alias: src1 +@@ -768,53 +768,23 @@ STAGE PLANS: + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE +- Map Join Operator +- condition map: +- Inner Join 0 to 1 +- Inner Join 0 to 2 +- Inner Join 0 to 3 +- keys: +- 0 +- 1 +- 2 +- 3 +- Map Join Vectorization: +- className: VectorMapJoinOperator +- native: false +- nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true +- nativeConditionsNotMet: One MapJoin Condition IS false +- outputColumnNames: _col0, _col1, _col2, _col3, _col6 +- input vertices: +- 0 Map 1 +- 1 Map 2 +- 2 Map 3 +- Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE +- Select Operator +- expressions: _col0 (type: string), _col1 (type: map), _col2 (type: array), _col3 (type: struct), _col6 (type: string) +- outputColumnNames: _col0, _col1, _col2, _col3, _col4 +- Select Vectorization: +- className: VectorSelectOperator +- native: true +- projectedOutputColumns: [0, 1, 2, 3, 4] +- Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE +- File Output Operator +- compressed: false +- File Sink Vectorization: +- className: VectorFileSinkOperator +- native: false +- Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE +- table: +- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde +- name: default.orc_create_complex ++ Reduce Output Operator ++ sort order: ++ Reduce Sink Vectorization: ++ className: VectorReduceSinkEmptyKeyOperator ++ keyColumns: [] ++ native: true ++ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true ++ valueColumns: [0] ++ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE ++ value expressions: _col0 (type: string) + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat +- allNative: false ++ allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: +@@ -822,7 +792,33 @@ STAGE PLANS: + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 +- scratchColumnTypeNames: string, map, array, struct ++ Reducer 2 ++ Execution mode: llap ++ Reduce Operator Tree: ++ Merge Join Operator ++ condition map: ++ Inner Join 0 to 1 ++ Inner Join 0 to 2 ++ Inner Join 0 to 3 ++ keys: ++ 0 ++ 1 ++ 2 ++ 3 ++ outputColumnNames: _col0, _col1, _col2, _col3, _col6 ++ Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE ++ Select Operator ++ expressions: _col0 (type: string), _col1 (type: map), _col2 (type: array), _col3 (type: struct), _col6 (type: string) ++ outputColumnNames: _col0, _col1, _col2, _col3, _col4 ++ Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE ++ File Output Operator ++ compressed: false ++ Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE ++ table: ++ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat ++ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat ++ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde ++ name: default.orc_create_complex + + Stage: Stage-2 + Dependency Collection +@@ -840,7 +836,7 @@ STAGE PLANS: + Stage: Stage-3 + Stats-Aggr Operator + +-Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 4' is a cross product ++Warning: Shuffle Join MERGEJOIN[15][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_0]] in Stage 'Reducer 2' is a cross product + PREHOOK: query: INSERT INTO TABLE orc_create_complex + SELECT orc_create_staging.*, src1.key FROM orc_create_staging cross join src src1 cross join orc_create_staging spam1 cross join orc_create_staging spam2 + PREHOOK: type: QUERY +diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out +index e43b4d1..e644f14 100644 +--- a/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out ++++ b/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out +@@ -1,4 +1,4 @@ +-Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product ++Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product + PREHOOK: query: explain vectorization expression + select * + from src +@@ -26,10 +26,10 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Map 1 <- Reducer 4 (BROADCAST_EDGE), Reducer 5 (BROADCAST_EDGE) +- Reducer 2 <- Map 1 (SIMPLE_EDGE) +- Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) +- Reducer 5 <- Map 3 (SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 5 (XPROD_EDGE), Reducer 6 (BROADCAST_EDGE) ++ Reducer 3 <- Reducer 2 (SIMPLE_EDGE) ++ Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) ++ Reducer 6 <- Map 4 (SIMPLE_EDGE) + #### A masked pattern was here #### + Vertices: + Map 1 +@@ -48,58 +48,14 @@ STAGE PLANS: + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE +- Map Join Operator +- condition map: +- Inner Join 0 to 1 +- keys: +- 0 +- 1 +- Map Join Vectorization: +- className: VectorMapJoinInnerMultiKeyOperator ++ Reduce Output Operator ++ sort order: ++ Reduce Sink Vectorization: ++ className: VectorReduceSinkEmptyKeyOperator + native: true +- nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true +- outputColumnNames: _col0, _col1, _col2, _col3 +- input vertices: +- 1 Reducer 4 +- Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE +- Map Join Operator +- condition map: +- Left Outer Join 0 to 1 +- keys: +- 0 _col0 (type: string) +- 1 _col0 (type: string) +- Map Join Vectorization: +- className: VectorMapJoinOuterStringOperator +- native: true +- nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true +- outputColumnNames: _col0, _col1, _col2, _col3, _col5 +- input vertices: +- 1 Reducer 5 +- Statistics: Num rows: 500 Data size: 98620 Basic stats: COMPLETE Column stats: COMPLETE +- Filter Operator +- Filter Vectorization: +- className: VectorFilterOperator +- native: true +- predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 2, val 0) -> boolean, FilterExprAndExpr(children: SelectColumnIsNull(col 4) -> boolean, SelectColumnIsNotNull(col 0) -> boolean, FilterLongColGreaterEqualLongColumn(col 3, col 2) -> boolean) -> boolean) -> boolean +- predicate: ((_col2 = 0) or (_col5 is null and _col0 is not null and (_col3 >= _col2))) (type: boolean) +- Statistics: Num rows: 500 Data size: 98620 Basic stats: COMPLETE Column stats: COMPLETE +- Select Operator +- expressions: _col0 (type: string), _col1 (type: string) +- outputColumnNames: _col0, _col1 +- Select Vectorization: +- className: VectorSelectOperator +- native: true +- projectedOutputColumns: [0, 1] +- Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE +- Reduce Output Operator +- key expressions: _col0 (type: string) +- sort order: + +- Reduce Sink Vectorization: +- className: VectorReduceSinkObjectHashOperator +- native: true +- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true +- Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE +- value expressions: _col1 (type: string) ++ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true ++ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE ++ value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: +@@ -110,7 +66,7 @@ STAGE PLANS: + allNative: true + usesVectorUDFAdaptor: false + vectorized: true +- Map 3 ++ Map 4 + Map Operator Tree: + TableScan + alias: src +@@ -180,6 +136,39 @@ STAGE PLANS: + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 ++ Execution mode: llap ++ Reduce Operator Tree: ++ Merge Join Operator ++ condition map: ++ Inner Join 0 to 1 ++ keys: ++ 0 ++ 1 ++ outputColumnNames: _col0, _col1, _col2, _col3 ++ Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE ++ Map Join Operator ++ condition map: ++ Left Outer Join 0 to 1 ++ keys: ++ 0 _col0 (type: string) ++ 1 _col0 (type: string) ++ outputColumnNames: _col0, _col1, _col2, _col3, _col5 ++ input vertices: ++ 1 Reducer 6 ++ Statistics: Num rows: 500 Data size: 98620 Basic stats: COMPLETE Column stats: COMPLETE ++ Filter Operator ++ predicate: ((_col2 = 0) or (_col5 is null and _col0 is not null and (_col3 >= _col2))) (type: boolean) ++ Statistics: Num rows: 500 Data size: 98620 Basic stats: COMPLETE Column stats: COMPLETE ++ Select Operator ++ expressions: _col0 (type: string), _col1 (type: string) ++ outputColumnNames: _col0, _col1 ++ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE ++ Reduce Output Operator ++ key expressions: _col0 (type: string) ++ sort order: + ++ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE ++ value expressions: _col1 (type: string) ++ Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true +@@ -207,7 +196,7 @@ STAGE PLANS: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +- Reducer 4 ++ Reducer 5 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true +@@ -238,7 +227,7 @@ STAGE PLANS: + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) +- Reducer 5 ++ Reducer 6 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true +@@ -287,7 +276,7 @@ STAGE PLANS: + Processor Tree: + ListSink + +-Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product ++Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product + PREHOOK: query: select * + from src + where not key in +@@ -316,7 +305,7 @@ POSTHOOK: Output: database:default + POSTHOOK: Output: default@orcsrc + POSTHOOK: Lineage: orcsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] + POSTHOOK: Lineage: orcsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +-Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product ++Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product + PREHOOK: query: select * + from orcsrc + where not key in +@@ -333,7 +322,7 @@ order by key + POSTHOOK: type: QUERY + POSTHOOK: Input: default@orcsrc + #### A masked pattern was here #### +-Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product ++Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product + PREHOOK: query: select * + from orcsrc + where not key in +diff --git a/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out b/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out +index 1a492b6..4e5205f 100644 +--- a/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out ++++ b/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out +@@ -26,7 +26,7 @@ POSTHOOK: Output: database:default + POSTHOOK: Output: default@myinput1 + POSTHOOK: Lineage: myinput1.key SIMPLE [(myinput1_txt)myinput1_txt.FieldSchema(name:key, type:int, comment:null), ] + POSTHOOK: Lineage: myinput1.value SIMPLE [(myinput1_txt)myinput1_txt.FieldSchema(name:value, type:int, comment:null), ] +-Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Map 1' is a cross product ++Warning: Shuffle Join MERGEJOIN[18][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product + PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value + PREHOOK: type: QUERY + PREHOOK: Input: default@myinput1 +diff --git a/ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out b/ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out +index 3497164..056360f 100644 +--- a/ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out ++++ b/ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out +@@ -26,7 +26,7 @@ POSTHOOK: Output: database:default + POSTHOOK: Output: default@myinput1 + POSTHOOK: Lineage: myinput1.key SIMPLE [(myinput1_txt)myinput1_txt.FieldSchema(name:key, type:int, comment:null), ] + POSTHOOK: Lineage: myinput1.value SIMPLE [(myinput1_txt)myinput1_txt.FieldSchema(name:value, type:int, comment:null), ] +-Warning: Map Join MAPJOIN[14][bigTable=?] in task 'Map 1' is a cross product ++Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product + PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b + PREHOOK: type: QUERY + PREHOOK: Input: default@myinput1 +diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +index 16cae79..7a4fe36 100644 +--- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out ++++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +@@ -2375,7 +2375,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) + #### A masked pattern was here #### +@@ -2524,7 +2524,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Map 4 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + #### A masked pattern was here #### + Vertices: +@@ -5507,7 +5507,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 + POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 + #### A masked pattern was here #### + 1000 +-Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Reducer 3' is a cross product ++Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product + PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' + PREHOOK: type: QUERY + POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' +@@ -5525,8 +5525,9 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (SIMPLE_EDGE) +- Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) ++ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) ++ Reducer 4 <- Map 1 (SIMPLE_EDGE) + #### A masked pattern was here #### + Vertices: + Map 1 +@@ -5540,24 +5541,6 @@ STAGE PLANS: + Reduce Output Operator + sort order: + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +- Execution mode: vectorized, llap +- LLAP IO: no inputs +- Map Vectorization: +- enabled: true +- enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true +- groupByVectorOutput: true +- inputFileFormats: org.apache.hadoop.mapred.TextInputFormat +- allNative: true +- usesVectorUDFAdaptor: false +- vectorized: true +- Map 2 +- Map Operator Tree: +- TableScan +- alias: srcpart +- filterExpr: (ds = '2008-04-08') (type: boolean) +- Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +- Select Operator +- Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: '2008-04-08' (type: string) + mode: hash +@@ -5578,6 +5561,25 @@ STAGE PLANS: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true ++ Reducer 2 ++ Execution mode: llap ++ Reduce Operator Tree: ++ Merge Join Operator ++ condition map: ++ Inner Join 0 to 1 ++ keys: ++ 0 ++ 1 ++ Statistics: Num rows: 500000 Data size: 11124000 Basic stats: COMPLETE Column stats: NONE ++ Group By Operator ++ aggregations: count() ++ mode: hash ++ outputColumnNames: _col0 ++ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE ++ Reduce Output Operator ++ sort order: ++ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE ++ value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: +@@ -5589,30 +5591,17 @@ STAGE PLANS: + vectorized: true + Reduce Operator Tree: + Group By Operator +- keys: KEY._col0 (type: string) ++ aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 +- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +- Select Operator +- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +- Map Join Operator +- condition map: +- Inner Join 0 to 1 +- keys: +- 0 +- 1 +- input vertices: +- 0 Map 1 +- Statistics: Num rows: 500000 Data size: 11124000 Basic stats: COMPLETE Column stats: NONE +- Group By Operator +- aggregations: count() +- mode: hash +- outputColumnNames: _col0 +- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +- Reduce Output Operator +- sort order: +- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +- value expressions: _col0 (type: bigint) ++ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE ++ File Output Operator ++ compressed: false ++ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE ++ table: ++ input format: org.apache.hadoop.mapred.SequenceFileInputFormat ++ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat ++ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: +@@ -5624,17 +5613,15 @@ STAGE PLANS: + vectorized: true + Reduce Operator Tree: + Group By Operator +- aggregations: count(VALUE._col0) ++ keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 +- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +- File Output Operator +- compressed: false +- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +- table: +- input format: org.apache.hadoop.mapred.SequenceFileInputFormat +- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat +- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe ++ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE ++ Select Operator ++ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE ++ Reduce Output Operator ++ sort order: ++ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator +@@ -5642,7 +5629,7 @@ STAGE PLANS: + Processor Tree: + ListSink + +-Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Reducer 3' is a cross product ++Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product + PREHOOK: query: select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' + PREHOOK: type: QUERY + PREHOOK: Input: default@srcpart +diff --git a/ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out b/ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out +index 2c66856..052fda6 100644 +--- a/ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out ++++ b/ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out +@@ -1,4 +1,4 @@ +-Warning: Map Join MAPJOIN[43][bigTable=?] in task 'Reducer 2' is a cross product ++Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product + PREHOOK: query: explain + select * from ( + select count(*) as h8_30_to_9 +@@ -32,9 +32,10 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Map 1 <- Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) +- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 3 (BROADCAST_EDGE) +- Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) ++ Map 1 <- Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE) ++ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) ++ Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) ++ Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) + #### A masked pattern was here #### + Vertices: + Map 1 +@@ -56,7 +57,7 @@ STAGE PLANS: + 0 _col0 (type: string) + 1 _col0 (type: string) + input vertices: +- 1 Map 4 ++ 1 Map 5 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() +@@ -74,7 +75,7 @@ STAGE PLANS: + 0 _col0 (type: string) + 1 _col0 (type: string) + input vertices: +- 1 Map 5 ++ 1 Map 6 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() +@@ -87,7 +88,7 @@ STAGE PLANS: + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs +- Map 4 ++ Map 5 + Map Operator Tree: + TableScan + alias: src1 +@@ -106,7 +107,7 @@ STAGE PLANS: + Statistics: Num rows: 1 Data size: 177 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs +- Map 5 ++ Map 6 + Map Operator Tree: + TableScan + alias: src1 +@@ -133,24 +134,29 @@ STAGE PLANS: + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE +- Map Join Operator +- condition map: +- Inner Join 0 to 1 +- keys: +- 0 +- 1 +- outputColumnNames: _col0, _col1 +- input vertices: +- 1 Reducer 3 +- Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE +- File Output Operator +- compressed: false +- Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE +- table: +- input format: org.apache.hadoop.mapred.SequenceFileInputFormat +- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat +- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe ++ Reduce Output Operator ++ sort order: ++ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE ++ value expressions: _col0 (type: bigint) + Reducer 3 ++ Execution mode: llap ++ Reduce Operator Tree: ++ Merge Join Operator ++ condition map: ++ Inner Join 0 to 1 ++ keys: ++ 0 ++ 1 ++ outputColumnNames: _col0, _col1 ++ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE ++ File Output Operator ++ compressed: false ++ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE ++ table: ++ input format: org.apache.hadoop.mapred.SequenceFileInputFormat ++ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat ++ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe ++ Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator +@@ -169,7 +175,7 @@ STAGE PLANS: + Processor Tree: + ListSink + +-Warning: Map Join MAPJOIN[43][bigTable=?] in task 'Reducer 2' is a cross product ++Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product + PREHOOK: query: select * from ( + select count(*) as h8_30_to_9 + from src +diff --git a/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out b/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out +index 4dfcc33..a709920 100644 +--- a/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out ++++ b/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out +@@ -1215,7 +1215,7 @@ POSTHOOK: Lineage: decimal_mapjoin.cdecimal1 EXPRESSION [(alltypesorc)alltypesor + POSTHOOK: Lineage: decimal_mapjoin.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] + POSTHOOK: Lineage: decimal_mapjoin.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] + POSTHOOK: Lineage: decimal_mapjoin.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +-Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Map 1' is a cross product ++Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product + PREHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 + FROM decimal_mapjoin l + JOIN decimal_mapjoin r ON l.cint = r.cint +@@ -1235,7 +1235,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Map 1 <- Map 2 (BROADCAST_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) + #### A masked pattern was here #### + Vertices: + Map 1 +@@ -1250,29 +1250,12 @@ STAGE PLANS: + expressions: cdecimal1 (type: decimal(20,10)) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 551 Basic stats: COMPLETE Column stats: NONE +- Map Join Operator +- condition map: +- Inner Join 0 to 1 +- keys: +- 0 +- 1 +- outputColumnNames: _col0, _col2 +- input vertices: +- 1 Map 2 +- Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE +- Select Operator +- expressions: 6981 (type: int), 6981 (type: int), _col0 (type: decimal(20,10)), _col2 (type: decimal(23,14)) +- outputColumnNames: _col0, _col1, _col2, _col3 +- Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE +- File Output Operator +- compressed: false +- Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE +- table: +- input format: org.apache.hadoop.mapred.SequenceFileInputFormat +- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat +- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe ++ Reduce Output Operator ++ sort order: ++ Statistics: Num rows: 5 Data size: 551 Basic stats: COMPLETE Column stats: NONE ++ value expressions: _col0 (type: decimal(20,10)) + Execution mode: vectorized +- Map 2 ++ Map 3 + Map Operator Tree: + TableScan + alias: r +@@ -1289,6 +1272,27 @@ STAGE PLANS: + Statistics: Num rows: 5 Data size: 551 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(23,14)) + Execution mode: vectorized ++ Reducer 2 ++ Reduce Operator Tree: ++ Merge Join Operator ++ condition map: ++ Inner Join 0 to 1 ++ keys: ++ 0 ++ 1 ++ outputColumnNames: _col0, _col2 ++ Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE ++ Select Operator ++ expressions: 6981 (type: int), 6981 (type: int), _col0 (type: decimal(20,10)), _col2 (type: decimal(23,14)) ++ outputColumnNames: _col0, _col1, _col2, _col3 ++ Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE ++ File Output Operator ++ compressed: false ++ Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE ++ table: ++ input format: org.apache.hadoop.mapred.SequenceFileInputFormat ++ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat ++ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator +@@ -1296,7 +1300,7 @@ STAGE PLANS: + Processor Tree: + ListSink + +-Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Map 1' is a cross product ++Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product + PREHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 + FROM decimal_mapjoin l + JOIN decimal_mapjoin r ON l.cint = r.cint +@@ -1312,8 +1316,6 @@ POSTHOOK: type: QUERY + POSTHOOK: Input: default@decimal_mapjoin + #### A masked pattern was here #### + 6981 6981 NULL NULL +-6981 6981 NULL -617.56077692307690 +-6981 6981 NULL -617.56077692307690 + 6981 6981 NULL NULL + 6981 6981 NULL NULL + 6981 6981 NULL NULL +@@ -1321,14 +1323,13 @@ POSTHOOK: Input: default@decimal_mapjoin + 6981 6981 NULL NULL + 6981 6981 NULL NULL + 6981 6981 NULL NULL +-6981 6981 NULL NULL + 6981 6981 NULL -617.56077692307690 + 6981 6981 NULL -617.56077692307690 + 6981 6981 NULL NULL + 6981 6981 NULL NULL + 6981 6981 NULL NULL +-6981 6981 NULL 6984454.21109769200000 + 6981 6981 NULL NULL ++6981 6981 NULL 6984454.21109769200000 + 6981 6981 NULL NULL + 6981 6981 NULL NULL + 6981 6981 NULL NULL +@@ -1337,8 +1338,8 @@ POSTHOOK: Input: default@decimal_mapjoin + 6981 6981 NULL NULL + 6981 6981 NULL NULL + 6981 6981 NULL NULL +-6981 6981 NULL 6984454.21109769200000 + 6981 6981 NULL NULL ++6981 6981 NULL 6984454.21109769200000 + 6981 6981 NULL NULL + 6981 6981 NULL NULL + 6981 6981 NULL NULL +@@ -1347,13 +1348,14 @@ POSTHOOK: Input: default@decimal_mapjoin + 6981 6981 NULL NULL + 6981 6981 NULL NULL + 6981 6981 NULL NULL ++6981 6981 NULL NULL + 6981 6981 NULL 6984454.21109769200000 + 6981 6981 NULL NULL + 6981 6981 NULL NULL + 6981 6981 NULL NULL ++6981 6981 NULL -617.56077692307690 ++6981 6981 NULL -617.56077692307690 + 6981 6981 5831542.2692483780 NULL +-6981 6981 5831542.2692483780 -617.56077692307690 +-6981 6981 5831542.2692483780 -617.56077692307690 + 6981 6981 5831542.2692483780 NULL + 6981 6981 5831542.2692483780 NULL + 6981 6981 5831542.2692483780 NULL +@@ -1361,9 +1363,9 @@ POSTHOOK: Input: default@decimal_mapjoin + 6981 6981 5831542.2692483780 NULL + 6981 6981 5831542.2692483780 NULL + 6981 6981 5831542.2692483780 NULL ++6981 6981 5831542.2692483780 -617.56077692307690 ++6981 6981 5831542.2692483780 -617.56077692307690 + 6981 6981 NULL NULL +-6981 6981 NULL -617.56077692307690 +-6981 6981 NULL -617.56077692307690 + 6981 6981 NULL NULL + 6981 6981 NULL NULL + 6981 6981 NULL NULL +@@ -1371,14 +1373,13 @@ POSTHOOK: Input: default@decimal_mapjoin + 6981 6981 NULL NULL + 6981 6981 NULL NULL + 6981 6981 NULL NULL +-6981 6981 NULL NULL + 6981 6981 NULL -617.56077692307690 + 6981 6981 NULL -617.56077692307690 + 6981 6981 NULL NULL + 6981 6981 NULL NULL + 6981 6981 NULL NULL +-6981 6981 NULL 6984454.21109769200000 + 6981 6981 NULL NULL ++6981 6981 NULL 6984454.21109769200000 + 6981 6981 NULL NULL + 6981 6981 NULL NULL + 6981 6981 NULL NULL +@@ -1387,13 +1388,14 @@ POSTHOOK: Input: default@decimal_mapjoin + 6981 6981 NULL NULL + 6981 6981 NULL NULL + 6981 6981 NULL NULL ++6981 6981 NULL NULL + 6981 6981 NULL 6984454.21109769200000 + 6981 6981 NULL NULL + 6981 6981 NULL NULL + 6981 6981 NULL NULL ++6981 6981 NULL -617.56077692307690 ++6981 6981 NULL -617.56077692307690 + 6981 6981 -515.6210729730 NULL +-6981 6981 -515.6210729730 -617.56077692307690 +-6981 6981 -515.6210729730 -617.56077692307690 + 6981 6981 -515.6210729730 NULL + 6981 6981 -515.6210729730 NULL + 6981 6981 -515.6210729730 NULL +@@ -1401,17 +1403,19 @@ POSTHOOK: Input: default@decimal_mapjoin + 6981 6981 -515.6210729730 NULL + 6981 6981 -515.6210729730 NULL + 6981 6981 -515.6210729730 NULL +-6981 6981 -515.6210729730 NULL + 6981 6981 -515.6210729730 -617.56077692307690 + 6981 6981 -515.6210729730 -617.56077692307690 + 6981 6981 -515.6210729730 NULL + 6981 6981 -515.6210729730 NULL + 6981 6981 -515.6210729730 NULL ++6981 6981 -515.6210729730 NULL + 6981 6981 -515.6210729730 6984454.21109769200000 + 6981 6981 -515.6210729730 NULL + 6981 6981 -515.6210729730 NULL + 6981 6981 -515.6210729730 NULL +-Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Map 1' is a cross product ++6981 6981 -515.6210729730 -617.56077692307690 ++6981 6981 -515.6210729730 -617.56077692307690 ++Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product + PREHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 + FROM decimal_mapjoin l + JOIN decimal_mapjoin r ON l.cint = r.cint +@@ -1431,7 +1435,7 @@ STAGE PLANS: + Tez + #### A masked pattern was here #### + Edges: +- Map 1 <- Map 2 (BROADCAST_EDGE) ++ Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) + #### A masked pattern was here #### + Vertices: + Map 1 +@@ -1446,29 +1450,12 @@ STAGE PLANS: + expressions: cdecimal1 (type: decimal(20,10)) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 551 Basic stats: COMPLETE Column stats: NONE +- Map Join Operator +- condition map: +- Inner Join 0 to 1 +- keys: +- 0 +- 1 +- outputColumnNames: _col0, _col2 +- input vertices: +- 1 Map 2 +- Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE +- Select Operator +- expressions: 6981 (type: int), 6981 (type: int), _col0 (type: decimal(20,10)), _col2 (type: decimal(23,14)) +- outputColumnNames: _col0, _col1, _col2, _col3 +- Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE +- File Output Operator +- compressed: false +- Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE +- table: +- input format: org.apache.hadoop.mapred.SequenceFileInputFormat +- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat +- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe ++ Reduce Output Operator ++ sort order: ++ Statistics: Num rows: 5 Data size: 551 Basic stats: COMPLETE Column stats: NONE ++ value expressions: _col0 (type: decimal(20,10)) + Execution mode: vectorized +- Map 2 ++ Map 3 + Map Operator Tree: + TableScan + alias: r +@@ -1485,6 +1472,27 @@ STAGE PLANS: + Statistics: Num rows: 5 Data size: 551 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(23,14)) + Execution mode: vectorized ++ Reducer 2 ++ Reduce Operator Tree: ++ Merge Join Operator ++ condition map: ++ Inner Join 0 to 1 ++ keys: ++ 0 ++ 1 ++ outputColumnNames: _col0, _col2 ++ Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE ++ Select Operator ++ expressions: 6981 (type: int), 6981 (type: int), _col0 (type: decimal(20,10)), _col2 (type: decimal(23,14)) ++ outputColumnNames: _col0, _col1, _col2, _col3 ++ Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE ++ File Output Operator ++ compressed: false ++ Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE ++ table: ++ input format: org.apache.hadoop.mapred.SequenceFileInputFormat ++ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat ++ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator +@@ -1492,7 +1500,7 @@ STAGE PLANS: + Processor Tree: + ListSink + +-Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Map 1' is a cross product ++Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product + PREHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 + FROM decimal_mapjoin l + JOIN decimal_mapjoin r ON l.cint = r.cint +@@ -1508,8 +1516,6 @@ POSTHOOK: type: QUERY + POSTHOOK: Input: default@decimal_mapjoin + #### A masked pattern was here #### + 6981 6981 NULL NULL +-6981 6981 NULL -617.56077692307690 +-6981 6981 NULL -617.56077692307690 + 6981 6981 NULL NULL + 6981 6981 NULL NULL + 6981 6981 NULL NULL +@@ -1517,14 +1523,13 @@ POSTHOOK: Input: default@decimal_mapjoin + 6981 6981 NULL NULL + 6981 6981 NULL NULL + 6981 6981 NULL NULL +-6981 6981 NULL NULL + 6981 6981 NULL -617.56077692307690 + 6981 6981 NULL -617.56077692307690 + 6981 6981 NULL NULL + 6981 6981 NULL NULL + 6981 6981 NULL NULL +-6981 6981 NULL 6984454.21109769200000 + 6981 6981 NULL NULL ++6981 6981 NULL 6984454.21109769200000 + 6981 6981 NULL NULL + 6981 6981 NULL NULL + 6981 6981 NULL NULL +@@ -1533,8 +1538,8 @@ POSTHOOK: Input: default@decimal_mapjoin + 6981 6981 NULL NULL + 6981 6981 NULL NULL + 6981 6981 NULL NULL +-6981 6981 NULL 6984454.21109769200000 + 6981 6981 NULL NULL ++6981 6981 NULL 6984454.21109769200000 + 6981 6981 NULL NULL + 6981 6981 NULL NULL + 6981 6981 NULL NULL +@@ -1543,13 +1548,14 @@ POSTHOOK: Input: default@decimal_mapjoin + 6981 6981 NULL NULL + 6981 6981 NULL NULL + 6981 6981 NULL NULL ++6981 6981 NULL NULL + 6981 6981 NULL 6984454.21109769200000 + 6981 6981 NULL NULL + 6981 6981 NULL NULL + 6981 6981 NULL NULL ++6981 6981 NULL -617.56077692307690 ++6981 6981 NULL -617.56077692307690 + 6981 6981 5831542.2692483780 NULL +-6981 6981 5831542.2692483780 -617.56077692307690 +-6981 6981 5831542.2692483780 -617.56077692307690 + 6981 6981 5831542.2692483780 NULL + 6981 6981 5831542.2692483780 NULL + 6981 6981 5831542.2692483780 NULL +@@ -1557,9 +1563,9 @@ POSTHOOK: Input: default@decimal_mapjoin + 6981 6981 5831542.2692483780 NULL + 6981 6981 5831542.2692483780 NULL + 6981 6981 5831542.2692483780 NULL ++6981 6981 5831542.2692483780 -617.56077692307690 ++6981 6981 5831542.2692483780 -617.56077692307690 + 6981 6981 NULL NULL +-6981 6981 NULL -617.56077692307690 +-6981 6981 NULL -617.56077692307690 + 6981 6981 NULL NULL + 6981 6981 NULL NULL + 6981 6981 NULL NULL +@@ -1567,14 +1573,13 @@ POSTHOOK: Input: default@decimal_mapjoin + 6981 6981 NULL NULL + 6981 6981 NULL NULL + 6981 6981 NULL NULL +-6981 6981 NULL NULL + 6981 6981 NULL -617.56077692307690 + 6981 6981 NULL -617.56077692307690 + 6981 6981 NULL NULL + 6981 6981 NULL NULL + 6981 6981 NULL NULL +-6981 6981 NULL 6984454.21109769200000 + 6981 6981 NULL NULL ++6981 6981 NULL 6984454.21109769200000 + 6981 6981 NULL NULL + 6981 6981 NULL NULL + 6981 6981 NULL NULL +@@ -1583,13 +1588,14 @@ POSTHOOK: Input: default@decimal_mapjoin + 6981 6981 NULL NULL + 6981 6981 NULL NULL + 6981 6981 NULL NULL ++6981 6981 NULL NULL + 6981 6981 NULL 6984454.21109769200000 + 6981 6981 NULL NULL + 6981 6981 NULL NULL + 6981 6981 NULL NULL ++6981 6981 NULL -617.56077692307690 ++6981 6981 NULL -617.56077692307690 + 6981 6981 -515.6210729730 NULL +-6981 6981 -515.6210729730 -617.56077692307690 +-6981 6981 -515.6210729730 -617.56077692307690 + 6981 6981 -515.6210729730 NULL + 6981 6981 -515.6210729730 NULL + 6981 6981 -515.6210729730 NULL +@@ -1597,16 +1603,18 @@ POSTHOOK: Input: default@decimal_mapjoin + 6981 6981 -515.6210729730 NULL + 6981 6981 -515.6210729730 NULL + 6981 6981 -515.6210729730 NULL +-6981 6981 -515.6210729730 NULL + 6981 6981 -515.6210729730 -617.56077692307690 + 6981 6981 -515.6210729730 -617.56077692307690 + 6981 6981 -515.6210729730 NULL + 6981 6981 -515.6210729730 NULL + 6981 6981 -515.6210729730 NULL ++6981 6981 -515.6210729730 NULL + 6981 6981 -515.6210729730 6984454.21109769200000 + 6981 6981 -515.6210729730 NULL + 6981 6981 -515.6210729730 NULL + 6981 6981 -515.6210729730 NULL ++6981 6981 -515.6210729730 -617.56077692307690 ++6981 6981 -515.6210729730 -617.56077692307690 + PREHOOK: query: DROP TABLE decimal_mapjoin + PREHOOK: type: DROPTABLE + PREHOOK: Input: default@decimal_mapjoin diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 5190f04..3f85b16 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -138,6 +138,9 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ count_dist_rewrite.q,\ create_merge_compressed.q,\ cross_join.q,\ + cross_prod_1.q,\ + cross_prod_3.q,\ + cross_prod_4.q,\ cross_product_check_1.q,\ cross_product_check_2.q,\ ctas.q,\ @@ -504,6 +507,9 @@ minillaplocal.query.files=\ correlationoptimizer4.q,\ correlationoptimizer6.q,\ disable_merge_for_bucketing.q,\ + cross_prod_1.q,\ + cross_prod_3.q,\ + cross_prod_4.q,\ dynamic_partition_pruning.q,\ dynamic_semijoin_reduction.q,\ dynamic_semijoin_reduction_2.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java index aae3480..5c338b8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java @@ -46,6 +46,9 @@ import org.apache.tez.mapreduce.common.MRInputSplitDistributor; import org.apache.tez.mapreduce.hadoop.InputSplitInfo; import org.apache.tez.mapreduce.protos.MRRuntimeProtos; +import org.apache.tez.runtime.library.api.Partitioner; +import org.apache.tez.runtime.library.cartesianproduct.CartesianProductConfig; +import org.apache.tez.runtime.library.cartesianproduct.CartesianProductEdgeManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -135,6 +138,7 @@ import org.apache.tez.runtime.library.conf.UnorderedKVEdgeConfig; import org.apache.tez.runtime.library.conf.UnorderedPartitionedKVEdgeConfig; import org.apache.tez.runtime.library.input.ConcatenatedMergedKeyValueInput; +import org.apache.tez.runtime.library.cartesianproduct.CartesianProductVertexManager; /** * DagUtils. DagUtils is a collection of helper methods to convert @@ -264,7 +268,7 @@ private JobConf initializeVertexConf(JobConf baseConf, Context context, MapWork */ @SuppressWarnings("rawtypes") public GroupInputEdge createEdge(VertexGroup group, JobConf vConf, Vertex w, - TezEdgeProperty edgeProp, VertexType vertexType) + TezEdgeProperty edgeProp, BaseWork work, TezWork tezWork) throws IOException { Class mergeInputClass; @@ -279,7 +283,8 @@ public GroupInputEdge createEdge(VertexGroup group, JobConf vConf, Vertex w, case CUSTOM_EDGE: { mergeInputClass = ConcatenatedMergedKeyValueInput.class; int numBuckets = edgeProp.getNumBuckets(); - CustomVertexConfiguration vertexConf = new CustomVertexConfiguration(numBuckets, vertexType); + CustomVertexConfiguration vertexConf + = new CustomVertexConfiguration(numBuckets, tezWork.getVertexType(work)); DataOutputBuffer dob = new DataOutputBuffer(); vertexConf.write(dob); VertexManagerPluginDescriptor desc = @@ -299,6 +304,10 @@ public GroupInputEdge createEdge(VertexGroup group, JobConf vConf, Vertex w, mergeInputClass = ConcatenatedMergedKeyValueInput.class; break; + case XPROD_EDGE: + mergeInputClass = ConcatenatedMergedKeyValueInput.class; + break; + case SIMPLE_EDGE: setupAutoReducerParallelism(edgeProp, w); // fall through @@ -308,7 +317,7 @@ public GroupInputEdge createEdge(VertexGroup group, JobConf vConf, Vertex w, break; } - return GroupInputEdge.create(group, w, createEdgeProperty(edgeProp, vConf), + return GroupInputEdge.create(group, w, createEdgeProperty(w, edgeProp, vConf, work, tezWork), InputDescriptor.create(mergeInputClass.getName())); } @@ -322,13 +331,14 @@ public GroupInputEdge createEdge(VertexGroup group, JobConf vConf, Vertex w, * @return */ public Edge createEdge(JobConf vConf, Vertex v, Vertex w, TezEdgeProperty edgeProp, - VertexType vertexType) + BaseWork work, TezWork tezWork) throws IOException { switch(edgeProp.getEdgeType()) { case CUSTOM_EDGE: { int numBuckets = edgeProp.getNumBuckets(); - CustomVertexConfiguration vertexConf = new CustomVertexConfiguration(numBuckets, vertexType); + CustomVertexConfiguration vertexConf = + new CustomVertexConfiguration(numBuckets, tezWork.getVertexType(work)); DataOutputBuffer dob = new DataOutputBuffer(); vertexConf.write(dob); VertexManagerPluginDescriptor desc = VertexManagerPluginDescriptor.create( @@ -339,6 +349,9 @@ public Edge createEdge(JobConf vConf, Vertex v, Vertex w, TezEdgeProperty edgePr w.setVertexManagerPlugin(desc); break; } + case XPROD_EDGE: + break; + case SIMPLE_EDGE: { setupAutoReducerParallelism(edgeProp, w); break; @@ -352,14 +365,15 @@ public Edge createEdge(JobConf vConf, Vertex v, Vertex w, TezEdgeProperty edgePr // nothing } - return Edge.create(v, w, createEdgeProperty(edgeProp, vConf)); + return Edge.create(v, w, createEdgeProperty(w, edgeProp, vConf, work, tezWork)); } /* * Helper function to create an edge property from an edge type. */ - private EdgeProperty createEdgeProperty(TezEdgeProperty edgeProp, Configuration conf) - throws IOException { + private EdgeProperty createEdgeProperty(Vertex w, TezEdgeProperty edgeProp, + Configuration conf, BaseWork work, TezWork tezWork) + throws IOException { MRHelpers.translateMRConfToTez(conf); String keyClass = conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS); String valClass = conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS); @@ -412,7 +426,23 @@ private EdgeProperty createEdgeProperty(TezEdgeProperty edgeProp, Configuration .setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null) .build(); return et4Conf.createDefaultOneToOneEdgeProperty(); + case XPROD_EDGE: + EdgeManagerPluginDescriptor edgeManagerDescriptor = + EdgeManagerPluginDescriptor.create(CartesianProductEdgeManager.class.getName()); + List crossProductSources = new ArrayList<>(); + for (BaseWork parentWork : tezWork.getParents(work)) { + if (EdgeType.XPROD_EDGE == tezWork.getEdgeType(parentWork, work)) { + crossProductSources.add(parentWork.getName()); + } + } + CartesianProductConfig cpConfig = new CartesianProductConfig(crossProductSources); + edgeManagerDescriptor.setUserPayload(cpConfig.toUserPayload(new TezConfiguration(conf))); + UnorderedPartitionedKVEdgeConfig cpEdgeConf = + UnorderedPartitionedKVEdgeConfig.newBuilder(keyClass, valClass, + ValueHashPartitioner.class.getName()).build(); + return cpEdgeConf.createDefaultCustomEdgeProperty(edgeManagerDescriptor); case SIMPLE_EDGE: + // fallthrough default: assert partitionerClassName != null; partitionerConf = createPartitionerConf(partitionerClassName, conf); @@ -427,6 +457,14 @@ private EdgeProperty createEdgeProperty(TezEdgeProperty edgeProp, Configuration } } + public static class ValueHashPartitioner implements Partitioner { + + @Override + public int getPartition(Object key, Object value, int numPartitions) { + return (value.hashCode() & 2147483647) % numPartitions; + } + } + /** * Utility method to create a stripped down configuration for the MR partitioner. * @@ -1240,6 +1278,21 @@ public Vertex createVertex(JobConf conf, BaseWork work, } else if (work instanceof MergeJoinWork) { v = createVertex(conf, (MergeJoinWork) work, appJarLr, additionalLr, fileSystem, scratchDir, ctx, vertexType); + // set VertexManagerPlugin if whether it's a cross product destination vertex + List crossProductSources = new ArrayList<>(); + for (BaseWork parentWork : tezWork.getParents(work)) { + if (tezWork.getEdgeType(parentWork, work) == EdgeType.XPROD_EDGE) { + crossProductSources.add(parentWork.getName()); + } + } + + if (!crossProductSources.isEmpty()) { + CartesianProductConfig cpConfig = new CartesianProductConfig(crossProductSources); + v.setVertexManagerPlugin( + VertexManagerPluginDescriptor.create(CartesianProductVertexManager.class.getName()) + .setUserPayload(cpConfig.toUserPayload(new TezConfiguration(conf)))); + // parallelism shouldn't be set for cartesian product vertex + } } else { // something is seriously wrong if this is happening throw new HiveException(ErrorMsg.GENERIC_ERROR.getErrorCodedMsg()); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java index 28d91cc..74ceb22 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java @@ -461,7 +461,7 @@ DAG build(JobConf conf, TezWork work, Path scratchDir, for (BaseWork v: children) { // finally we can create the grouped edge GroupInputEdge e = utils.createEdge(group, parentConf, - workToVertex.get(v), work.getEdgeProperty(w, v), work.getVertexType(v)); + workToVertex.get(v), work.getEdgeProperty(w, v), v, work); dag.addEdge(e); } @@ -490,8 +490,7 @@ DAG build(JobConf conf, TezWork work, Path scratchDir, Edge e = null; TezEdgeProperty edgeProp = work.getEdgeProperty(w, v); - - e = utils.createEdge(wxConf, wx, workToVertex.get(v), edgeProp, work.getVertexType(v)); + e = utils.createEdge(wxConf, wx, workToVertex.get(v), edgeProp, v, work); dag.addEdge(e); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index e24760b..3af53ef 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -102,6 +102,14 @@ MemoryMonitorInfo memoryMonitorInfo = getMemoryMonitorInfo(maxSize, context.conf); joinOp.getConf().setMemoryMonitorInfo(memoryMonitorInfo); + // not use map join in case of cross product + boolean cartesianProductEdgeEnabled = + HiveConf.getBoolVar(context.conf, HiveConf.ConfVars.TEZ_CARTESIAN_PRODUCT_EDGE_ENABLED); + if (cartesianProductEdgeEnabled && !hasOuterJoin(joinOp) && isCrossProduct(joinOp)) { + fallbackToMergeJoin(joinOp, context); + return null; + } + TezBucketJoinProcCtx tezBucketJoinProcCtx = new TezBucketJoinProcCtx(context.conf); boolean hiveConvertJoin = context.conf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN) & !context.parseContext.getDisableMapJoin(); @@ -572,6 +580,42 @@ private boolean checkColEquality(List> grandParentColNames, return false; } + private boolean hasOuterJoin(JoinOperator joinOp) throws SemanticException { + boolean hasOuter = false; + for (JoinCondDesc joinCondDesc : joinOp.getConf().getConds()) { + switch (joinCondDesc.getType()) { + case JoinDesc.INNER_JOIN: + case JoinDesc.LEFT_SEMI_JOIN: + case JoinDesc.UNIQUE_JOIN: + hasOuter = false; + break; + + case JoinDesc.FULL_OUTER_JOIN: + case JoinDesc.LEFT_OUTER_JOIN: + case JoinDesc.RIGHT_OUTER_JOIN: + hasOuter = true; + break; + + default: + throw new SemanticException("Unknown join type " + joinCondDesc.getType()); + } + } + return hasOuter; + } + + private boolean isCrossProduct(JoinOperator joinOp) { + ExprNodeDesc[][] joinExprs = joinOp.getConf().getJoinKeys(); + if (joinExprs != null) { + for (ExprNodeDesc[] expr : joinExprs) { + if (expr != null && expr.length != 0) { + return false; + } + } + } + + return true; + } + /** * Obtain big table position for join. * @@ -597,26 +641,7 @@ public int getMapJoinConversionPos(JoinOperator joinOp, OptimizeTezProcContext c * case this for now. */ if (joinOp.getConf().getConds().length > 1) { - boolean hasOuter = false; - for (JoinCondDesc joinCondDesc : joinOp.getConf().getConds()) { - switch (joinCondDesc.getType()) { - case JoinDesc.INNER_JOIN: - case JoinDesc.LEFT_SEMI_JOIN: - case JoinDesc.UNIQUE_JOIN: - hasOuter = false; - break; - - case JoinDesc.FULL_OUTER_JOIN: - case JoinDesc.LEFT_OUTER_JOIN: - case JoinDesc.RIGHT_OUTER_JOIN: - hasOuter = true; - break; - - default: - throw new SemanticException("Unknown join type " + joinCondDesc.getType()); - } - } - if (hasOuter) { + if (hasOuterJoin(joinOp)) { return -1; } } @@ -1058,14 +1083,19 @@ private void fallbackToReduceSideJoin(JoinOperator joinOp, OptimizeTezProcContex } } + // we are just converting to a common merge join operator. The shuffle + // join in map-reduce case. + fallbackToMergeJoin(joinOp, context); + } + + private void fallbackToMergeJoin(JoinOperator joinOp, OptimizeTezProcContext context) + throws SemanticException { int pos = getMapJoinConversionPos(joinOp, context, estimateNumBuckets(joinOp, false), true, Long.MAX_VALUE, false); if (pos < 0) { LOG.info("Could not get a valid join position. Defaulting to position 0"); pos = 0; } - // we are just converting to a common merge join operator. The shuffle - // join in map-reduce case. LOG.info("Fallback to common merge join operator"); convertJoinSMBJoin(joinOp, context, pos, 0, false); } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductCheck.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductCheck.java deleted file mode 100644 index f5abaf1..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductCheck.java +++ /dev/null @@ -1,368 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.optimizer.physical; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Stack; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; -import org.apache.hadoop.hive.ql.exec.ConditionalTask; -import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator; -import org.apache.hadoop.hive.ql.exec.JoinOperator; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator; -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; -import org.apache.hadoop.hive.ql.exec.TableScanOperator; -import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; -import org.apache.hadoop.hive.ql.exec.tez.TezTask; -import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; -import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; -import org.apache.hadoop.hive.ql.lib.Dispatcher; -import org.apache.hadoop.hive.ql.lib.GraphWalker; -import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.lib.NodeProcessor; -import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.lib.Rule; -import org.apache.hadoop.hive.ql.lib.RuleRegExp; -import org.apache.hadoop.hive.ql.lib.TaskGraphWalker; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.BaseWork; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.MapJoinDesc; -import org.apache.hadoop.hive.ql.plan.MapWork; -import org.apache.hadoop.hive.ql.plan.MapredWork; -import org.apache.hadoop.hive.ql.plan.MergeJoinWork; -import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; -import org.apache.hadoop.hive.ql.plan.ReduceWork; -import org.apache.hadoop.hive.ql.plan.TableScanDesc; -import org.apache.hadoop.hive.ql.plan.TezWork; -import org.apache.hadoop.hive.ql.session.SessionState; - -/* - * Check each MapJoin and ShuffleJoin Operator to see they are performing a cross product. - * If yes, output a warning to the Session's console. - * The Checks made are the following: - * 1. MR, Shuffle Join: - * Check the parent ReduceSinkOp of the JoinOp. If its keys list is size = 0, then - * this is a cross product. - * The parent ReduceSinkOp is in the MapWork for the same Stage. - * 2. MR, MapJoin: - * If the keys expr list on the mapJoin Desc is an empty list for any input, - * this implies a cross product. - * 3. Tez, Shuffle Join: - * Check the parent ReduceSinkOp of the JoinOp. If its keys list is size = 0, then - * this is a cross product. - * The parent ReduceSinkOp checked is based on the ReduceWork.tagToInput map on the - * reduceWork that contains the JoinOp. - * 4. Tez, Map Join: - * If the keys expr list on the mapJoin Desc is an empty list for any input, - * this implies a cross product. - */ -public class CrossProductCheck implements PhysicalPlanResolver, Dispatcher { - - protected static transient final Logger LOG = LoggerFactory - .getLogger(CrossProductCheck.class); - - @Override - public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { - TaskGraphWalker ogw = new TaskGraphWalker(this); - - ArrayList topNodes = new ArrayList(); - topNodes.addAll(pctx.getRootTasks()); - - ogw.startWalking(topNodes, null); - return pctx; - } - - @Override - public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) - throws SemanticException { - @SuppressWarnings("unchecked") - Task currTask = (Task) nd; - if (currTask instanceof MapRedTask) { - MapRedTask mrTsk = (MapRedTask)currTask; - MapredWork mrWrk = mrTsk.getWork(); - checkMapJoins(mrTsk); - checkMRReducer(currTask.toString(), mrWrk); - } else if (currTask instanceof ConditionalTask ) { - List> taskListInConditionalTask = - ((ConditionalTask) currTask).getListTasks(); - for(Task tsk: taskListInConditionalTask){ - dispatch(tsk, stack, nodeOutputs); - } - - } else if (currTask instanceof TezTask) { - TezTask tzTask = (TezTask) currTask; - TezWork tzWrk = tzTask.getWork(); - checkMapJoins(tzWrk); - checkTezReducer(tzWrk); - } - return null; - } - - private void warn(String msg) { - SessionState.getConsole().printInfo("Warning: " + msg, false); - } - - private void checkMapJoins(MapRedTask mrTsk) throws SemanticException { - MapredWork mrWrk = mrTsk.getWork(); - MapWork mapWork = mrWrk.getMapWork(); - List warnings = new MapJoinCheck(mrTsk.toString()).analyze(mapWork); - if (!warnings.isEmpty()) { - for (String w : warnings) { - warn(w); - } - } - ReduceWork redWork = mrWrk.getReduceWork(); - if (redWork != null) { - warnings = new MapJoinCheck(mrTsk.toString()).analyze(redWork); - if (!warnings.isEmpty()) { - for (String w : warnings) { - warn(w); - } - } - } - } - - private void checkMapJoins(TezWork tzWrk) throws SemanticException { - for(BaseWork wrk : tzWrk.getAllWork() ) { - - if ( wrk instanceof MergeJoinWork ) { - wrk = ((MergeJoinWork)wrk).getMainWork(); - } - - List warnings = new MapJoinCheck(wrk.getName()).analyze(wrk); - if ( !warnings.isEmpty() ) { - for(String w : warnings) { - warn(w); - } - } - } - } - - private void checkTezReducer(TezWork tzWrk) throws SemanticException { - for(BaseWork wrk : tzWrk.getAllWork() ) { - - if ( wrk instanceof MergeJoinWork ) { - wrk = ((MergeJoinWork)wrk).getMainWork(); - } - - if ( !(wrk instanceof ReduceWork ) ) { - continue; - } - ReduceWork rWork = (ReduceWork) wrk; - Operator reducer = ((ReduceWork)wrk).getReducer(); - if ( reducer instanceof JoinOperator || reducer instanceof CommonMergeJoinOperator ) { - Map rsInfo = - new HashMap(); - for(Map.Entry e : rWork.getTagToInput().entrySet()) { - rsInfo.putAll(getReducerInfo(tzWrk, rWork.getName(), e.getValue())); - } - checkForCrossProduct(rWork.getName(), reducer, rsInfo); - } - } - } - - private void checkMRReducer(String taskName, MapredWork mrWrk) throws SemanticException { - ReduceWork rWrk = mrWrk.getReduceWork(); - if ( rWrk == null) { - return; - } - Operator reducer = rWrk.getReducer(); - if ( reducer instanceof JoinOperator|| reducer instanceof CommonMergeJoinOperator ) { - BaseWork prntWork = mrWrk.getMapWork(); - checkForCrossProduct(taskName, reducer, - new ExtractReduceSinkInfo(null).analyze(prntWork)); - } - } - - private void checkForCrossProduct(String taskName, - Operator reducer, - Map rsInfo) { - if ( rsInfo.isEmpty() ) { - return; - } - Iterator it = rsInfo.values().iterator(); - ExtractReduceSinkInfo.Info info = it.next(); - if (info.keyCols.size() == 0) { - List iAliases = new ArrayList(); - iAliases.addAll(info.inputAliases); - while (it.hasNext()) { - info = it.next(); - iAliases.addAll(info.inputAliases); - } - String warning = String.format( - "Shuffle Join %s[tables = %s] in Stage '%s' is a cross product", - reducer.toString(), - iAliases, - taskName); - warn(warning); - } - } - - private Map getReducerInfo(TezWork tzWrk, String vertex, String prntVertex) - throws SemanticException { - BaseWork prntWork = tzWrk.getWorkMap().get(prntVertex); - return new ExtractReduceSinkInfo(vertex).analyze(prntWork); - } - - /* - * Given a Work descriptor and the TaskName for the work - * this is responsible to check each MapJoinOp for cross products. - * The analyze call returns the warnings list. - *

- * For MR the taskname is the StageName, for Tez it is the vertex name. - */ - public static class MapJoinCheck implements NodeProcessor, NodeProcessorCtx { - - final List warnings; - final String taskName; - - MapJoinCheck(String taskName) { - this.taskName = taskName; - warnings = new ArrayList(); - } - - List analyze(BaseWork work) throws SemanticException { - Map opRules = new LinkedHashMap(); - opRules.put(new RuleRegExp("R1", MapJoinOperator.getOperatorName() - + "%"), this); - Dispatcher disp = new DefaultRuleDispatcher(new NoopProcessor(), opRules, this); - GraphWalker ogw = new DefaultGraphWalker(disp); - ArrayList topNodes = new ArrayList(); - topNodes.addAll(work.getAllRootOperators()); - ogw.startWalking(topNodes, null); - return warnings; - } - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - @SuppressWarnings("unchecked") - AbstractMapJoinOperator mjOp = (AbstractMapJoinOperator) nd; - MapJoinDesc mjDesc = mjOp.getConf(); - - String bigTablAlias = mjDesc.getBigTableAlias(); - if ( bigTablAlias == null ) { - Operator parent = null; - for(Operator op : mjOp.getParentOperators() ) { - if ( op instanceof TableScanOperator ) { - parent = op; - } - } - if ( parent != null) { - TableScanDesc tDesc = ((TableScanOperator)parent).getConf(); - bigTablAlias = tDesc.getAlias(); - } - } - bigTablAlias = bigTablAlias == null ? "?" : bigTablAlias; - - List joinExprs = mjDesc.getKeys().values().iterator().next(); - - if ( joinExprs.size() == 0 ) { - warnings.add( - String.format("Map Join %s[bigTable=%s] in task '%s' is a cross product", - mjOp.toString(), bigTablAlias, taskName)); - } - - return null; - } - } - - /* - * for a given Work Descriptor, it extracts information about the ReduceSinkOps - * in the Work. For Tez, you can restrict it to ReduceSinks for a particular output - * vertex. - */ - public static class ExtractReduceSinkInfo implements NodeProcessor, NodeProcessorCtx { - - static class Info { - List keyCols; - List inputAliases; - - Info(List keyCols, List inputAliases) { - this.keyCols = keyCols; - this.inputAliases = inputAliases == null ? new ArrayList() : inputAliases; - } - - Info(List keyCols, String[] inputAliases) { - this.keyCols = keyCols; - this.inputAliases = inputAliases == null ? new ArrayList() : Arrays.asList(inputAliases); - } - } - - final String outputTaskName; - final Map reduceSinkInfo; - - ExtractReduceSinkInfo(String parentTaskName) { - this.outputTaskName = parentTaskName; - reduceSinkInfo = new HashMap(); - } - - Map analyze(BaseWork work) throws SemanticException { - Map opRules = new LinkedHashMap(); - opRules.put(new RuleRegExp("R1", ReduceSinkOperator.getOperatorName() - + "%"), this); - Dispatcher disp = new DefaultRuleDispatcher(new NoopProcessor(), opRules, this); - GraphWalker ogw = new DefaultGraphWalker(disp); - ArrayList topNodes = new ArrayList(); - topNodes.addAll(work.getAllRootOperators()); - ogw.startWalking(topNodes, null); - return reduceSinkInfo; - } - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - ReduceSinkOperator rsOp = (ReduceSinkOperator) nd; - ReduceSinkDesc rsDesc = rsOp.getConf(); - - if ( outputTaskName != null ) { - String rOutputName = rsDesc.getOutputName(); - if ( rOutputName == null || !outputTaskName.equals(rOutputName)) { - return null; - } - } - - reduceSinkInfo.put(rsDesc.getTag(), - new Info(rsDesc.getKeyCols(), rsOp.getInputAliases())); - - return null; - } - } - - static class NoopProcessor implements NodeProcessor { - @Override - public final Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - return nd; - } - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductHandler.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductHandler.java new file mode 100644 index 0000000..93367d9 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductHandler.java @@ -0,0 +1,382 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.physical; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Stack; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.plan.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; +import org.apache.hadoop.hive.ql.exec.ConditionalTask; +import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator; +import org.apache.hadoop.hive.ql.exec.JoinOperator; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; +import org.apache.hadoop.hive.ql.exec.tez.TezTask; +import org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType; +import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.lib.TaskGraphWalker; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.session.SessionState; + +/* + * Check each MapJoin and ShuffleJoin Operator to see they are performing a cross product. + * If yes, output a warning to the Session's console. + * The Checks made are the following: + * 1. MR, Shuffle Join: + * Check the parent ReduceSinkOp of the JoinOp. If its keys list is size = 0, then + * this is a cross product. + * The parent ReduceSinkOp is in the MapWork for the same Stage. + * 2. MR, MapJoin: + * If the keys expr list on the mapJoin Desc is an empty list for any input, + * this implies a cross product. + * 3. Tez, Shuffle Join: + * Check the parent ReduceSinkOp of the JoinOp. If its keys list is size = 0, then + * this is a cross product. + * The parent ReduceSinkOp checked is based on the ReduceWork.tagToInput map on the + * reduceWork that contains the JoinOp. + * 4. Tez, Map Join: + * If the keys expr list on the mapJoin Desc is an empty list for any input, + * this implies a cross product. + */ +public class CrossProductHandler implements PhysicalPlanResolver, Dispatcher { + + protected static transient final Logger LOG = LoggerFactory + .getLogger(CrossProductHandler.class); + private Boolean cartesianProductEdgeEnabled = null; + + @Override + public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { + cartesianProductEdgeEnabled = + HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.TEZ_CARTESIAN_PRODUCT_EDGE_ENABLED); + TaskGraphWalker ogw = new TaskGraphWalker(this); + + ArrayList topNodes = new ArrayList(); + topNodes.addAll(pctx.getRootTasks()); + + ogw.startWalking(topNodes, null); + return pctx; + } + + @Override + public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) + throws SemanticException { + @SuppressWarnings("unchecked") + Task currTask = (Task) nd; + if (currTask instanceof MapRedTask) { + MapRedTask mrTsk = (MapRedTask)currTask; + MapredWork mrWrk = mrTsk.getWork(); + checkMapJoins(mrTsk); + checkMRReducer(currTask.toString(), mrWrk); + } else if (currTask instanceof ConditionalTask ) { + List> taskListInConditionalTask = + ((ConditionalTask) currTask).getListTasks(); + for(Task tsk: taskListInConditionalTask){ + dispatch(tsk, stack, nodeOutputs); + } + + } else if (currTask instanceof TezTask) { + TezTask tezTask = (TezTask) currTask; + TezWork tezWork = tezTask.getWork(); + checkMapJoins(tezWork); + checkTezReducer(tezWork); + } + return null; + } + + private void warn(String msg) { + SessionState.getConsole().printInfo("Warning: " + msg, false); + } + + private void checkMapJoins(MapRedTask mrTsk) throws SemanticException { + MapredWork mrWrk = mrTsk.getWork(); + MapWork mapWork = mrWrk.getMapWork(); + List warnings = new MapJoinCheck(mrTsk.toString()).analyze(mapWork); + if (!warnings.isEmpty()) { + for (String w : warnings) { + warn(w); + } + } + ReduceWork redWork = mrWrk.getReduceWork(); + if (redWork != null) { + warnings = new MapJoinCheck(mrTsk.toString()).analyze(redWork); + if (!warnings.isEmpty()) { + for (String w : warnings) { + warn(w); + } + } + } + } + + private void checkMapJoins(TezWork tezWork) throws SemanticException { + for(BaseWork wrk : tezWork.getAllWork() ) { + + if ( wrk instanceof MergeJoinWork ) { + wrk = ((MergeJoinWork)wrk).getMainWork(); + } + + List warnings = new MapJoinCheck(wrk.getName()).analyze(wrk); + if ( !warnings.isEmpty() ) { + for(String w : warnings) { + warn(w); + } + } + } + } + + private void checkTezReducer(TezWork tezWork) throws SemanticException { + for(BaseWork wrk : tezWork.getAllWork() ) { + BaseWork origWrk = null; + + if ( wrk instanceof MergeJoinWork ) { + origWrk = wrk; + wrk = ((MergeJoinWork)wrk).getMainWork(); + } + + if ( !(wrk instanceof ReduceWork ) ) { + continue; + } + ReduceWork rWork = (ReduceWork) wrk; + Operator reducer = ((ReduceWork)wrk).getReducer(); + if ( reducer instanceof JoinOperator || reducer instanceof CommonMergeJoinOperator ) { + boolean noOuterJoin = ((JoinDesc)reducer.getConf()).isNoOuterJoin(); + Map rsInfo = + new HashMap(); + for(Map.Entry e : rWork.getTagToInput().entrySet()) { + rsInfo.putAll(getReducerInfo(tezWork, rWork.getName(), e.getValue())); + } + if (checkForCrossProduct(rWork.getName(), reducer, rsInfo) + && cartesianProductEdgeEnabled && noOuterJoin) { + List parents = tezWork.getParents(null == origWrk ? wrk : origWrk); + for (BaseWork p: parents) { + TezEdgeProperty prop = tezWork.getEdgeProperty(p, null == origWrk ? wrk : origWrk); + LOG.info("Edge Type: "+prop.getEdgeType()); + if (prop.getEdgeType().equals(EdgeType.CUSTOM_SIMPLE_EDGE) + || prop.getEdgeType().equals(EdgeType.CUSTOM_EDGE)) { + prop.setEdgeType(EdgeType.XPROD_EDGE); + rWork.setNumReduceTasks(-1); + rWork.setMaxReduceTasks(-1); + rWork.setMinReduceTasks(-1); + } + } + } + } + } + } + + private void checkMRReducer(String taskName, MapredWork mrWrk) throws SemanticException { + ReduceWork rWrk = mrWrk.getReduceWork(); + if ( rWrk == null) { + return; + } + Operator reducer = rWrk.getReducer(); + if ( reducer instanceof JoinOperator|| reducer instanceof CommonMergeJoinOperator ) { + BaseWork parentWork = mrWrk.getMapWork(); + checkForCrossProduct(taskName, reducer, + new ExtractReduceSinkInfo(null).analyze(parentWork)); + } + } + + private boolean checkForCrossProduct(String taskName, + Operator reducer, + Map rsInfo) { + if ( rsInfo.isEmpty() ) { + return false; + } + Iterator it = rsInfo.values().iterator(); + ExtractReduceSinkInfo.Info info = it.next(); + if (info.keyCols.size() == 0) { + List iAliases = new ArrayList(); + iAliases.addAll(info.inputAliases); + while (it.hasNext()) { + info = it.next(); + iAliases.addAll(info.inputAliases); + } + String warning = String.format( + "Shuffle Join %s[tables = %s] in Stage '%s' is a cross product", + reducer.toString(), + iAliases, + taskName); + warn(warning); + return true; + } + return false; + } + + private Map getReducerInfo(TezWork tezWork, String vertex, String prntVertex) + throws SemanticException { + BaseWork parentWork = tezWork.getWorkMap().get(prntVertex); + return new ExtractReduceSinkInfo(vertex).analyze(parentWork); + } + + /* + * Given a Work descriptor and the TaskName for the work + * this is responsible to check each MapJoinOp for cross products. + * The analyze call returns the warnings list. + *

+ * For MR the taskname is the StageName, for Tez it is the vertex name. + */ + public static class MapJoinCheck implements NodeProcessor, NodeProcessorCtx { + + final List warnings; + final String taskName; + + MapJoinCheck(String taskName) { + this.taskName = taskName; + warnings = new ArrayList(); + } + + List analyze(BaseWork work) throws SemanticException { + Map opRules = new LinkedHashMap(); + opRules.put(new RuleRegExp("R1", MapJoinOperator.getOperatorName() + + "%"), this); + Dispatcher disp = new DefaultRuleDispatcher(new NoopProcessor(), opRules, this); + GraphWalker ogw = new DefaultGraphWalker(disp); + ArrayList topNodes = new ArrayList(); + topNodes.addAll(work.getAllRootOperators()); + ogw.startWalking(topNodes, null); + return warnings; + } + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + @SuppressWarnings("unchecked") + AbstractMapJoinOperator mjOp = (AbstractMapJoinOperator) nd; + MapJoinDesc mjDesc = mjOp.getConf(); + + String bigTablAlias = mjDesc.getBigTableAlias(); + if ( bigTablAlias == null ) { + Operator parent = null; + for(Operator op : mjOp.getParentOperators() ) { + if ( op instanceof TableScanOperator ) { + parent = op; + } + } + if ( parent != null) { + TableScanDesc tDesc = ((TableScanOperator)parent).getConf(); + bigTablAlias = tDesc.getAlias(); + } + } + bigTablAlias = bigTablAlias == null ? "?" : bigTablAlias; + + List joinExprs = mjDesc.getKeys().values().iterator().next(); + + if ( joinExprs.size() == 0 ) { + warnings.add( + String.format("Map Join %s[bigTable=%s] in task '%s' is a cross product", + mjOp.toString(), bigTablAlias, taskName)); + } + + return null; + } + } + + /* + * for a given Work Descriptor, it extracts information about the ReduceSinkOps + * in the Work. For Tez, you can restrict it to ReduceSinks for a particular output + * vertex. + */ + public static class ExtractReduceSinkInfo implements NodeProcessor, NodeProcessorCtx { + + static class Info { + List keyCols; + List inputAliases; + + Info(List keyCols, List inputAliases) { + this.keyCols = keyCols; + this.inputAliases = inputAliases == null ? new ArrayList() : inputAliases; + } + + Info(List keyCols, String[] inputAliases) { + this.keyCols = keyCols; + this.inputAliases = inputAliases == null ? new ArrayList() : Arrays.asList(inputAliases); + } + } + + final String outputTaskName; + final Map reduceSinkInfo; + + ExtractReduceSinkInfo(String parentTaskName) { + this.outputTaskName = parentTaskName; + reduceSinkInfo = new HashMap(); + } + + Map analyze(BaseWork work) throws SemanticException { + Map opRules = new LinkedHashMap(); + opRules.put(new RuleRegExp("R1", ReduceSinkOperator.getOperatorName() + + "%"), this); + Dispatcher disp = new DefaultRuleDispatcher(new NoopProcessor(), opRules, this); + GraphWalker ogw = new DefaultGraphWalker(disp); + ArrayList topNodes = new ArrayList(); + topNodes.addAll(work.getAllRootOperators()); + ogw.startWalking(topNodes, null); + return reduceSinkInfo; + } + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + ReduceSinkOperator rsOp = (ReduceSinkOperator) nd; + ReduceSinkDesc rsDesc = rsOp.getConf(); + + if ( outputTaskName != null ) { + String rOutputName = rsDesc.getOutputName(); + if ( rOutputName == null || !outputTaskName.equals(rOutputName)) { + return null; + } + } + + reduceSinkInfo.put(rsDesc.getTag(), + new Info(rsDesc.getKeyCols(), rsOp.getInputAliases())); + + return null; + } + } + + static class NoopProcessor implements NodeProcessor { + @Override + public final Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + return nd; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java index 9377563..c040406 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java @@ -82,7 +82,7 @@ private void initialize(HiveConf hiveConf) { } if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_CHECK_CROSS_PRODUCT)) { - resolvers.add(new CrossProductCheck()); + resolvers.add(new CrossProductHandler()); } // Vectorization should be the last optimization, because it doesn't modify the plan diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java index f904451..cdb3bc8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java @@ -92,10 +92,10 @@ private void checkShuffleJoin(SparkWork sparkWork) throws SemanticException { for (ReduceWork reduceWork : sparkWork.getAllReduceWork()) { Operator reducer = reduceWork.getReducer(); if (reducer instanceof JoinOperator || reducer instanceof CommonMergeJoinOperator) { - Map rsInfo = - new HashMap(); + Map rsInfo = + new HashMap(); for (BaseWork parent : sparkWork.getParents(reduceWork)) { - rsInfo.putAll(new CrossProductCheck.ExtractReduceSinkInfo(null).analyze(parent)); + rsInfo.putAll(new CrossProductHandler.ExtractReduceSinkInfo(null).analyze(parent)); } checkForCrossProduct(reduceWork.getName(), reducer, rsInfo); } @@ -106,7 +106,7 @@ private void checkMapJoin(SparkTask sparkTask) throws SemanticException { SparkWork sparkWork = sparkTask.getWork(); for (BaseWork baseWork : sparkWork.getAllWork()) { List warnings = - new CrossProductCheck.MapJoinCheck(sparkTask.toString()).analyze(baseWork); + new CrossProductHandler.MapJoinCheck(sparkTask.toString()).analyze(baseWork); for (String w : warnings) { warn(w); } @@ -115,12 +115,12 @@ private void checkMapJoin(SparkTask sparkTask) throws SemanticException { private void checkForCrossProduct(String workName, Operator reducer, - Map rsInfo) { + Map rsInfo) { if (rsInfo.isEmpty()) { return; } - Iterator it = rsInfo.values().iterator(); - CrossProductCheck.ExtractReduceSinkInfo.Info info = it.next(); + Iterator it = rsInfo.values().iterator(); + CrossProductHandler.ExtractReduceSinkInfo.Info info = it.next(); if (info.keyCols.size() == 0) { List iAliases = new ArrayList(); iAliases.addAll(info.inputAliases); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index 15836ec..da30c3b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -80,7 +80,7 @@ import org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkJoinDeDuplication; import org.apache.hadoop.hive.ql.optimizer.metainfo.annotation.AnnotateWithOpTraits; import org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer; -import org.apache.hadoop.hive.ql.optimizer.physical.CrossProductCheck; +import org.apache.hadoop.hive.ql.optimizer.physical.CrossProductHandler; import org.apache.hadoop.hive.ql.optimizer.physical.LlapClusterStateForCompile; import org.apache.hadoop.hive.ql.optimizer.physical.LlapDecider; import org.apache.hadoop.hive.ql.optimizer.physical.LlapPreVectorizationPass; @@ -658,7 +658,7 @@ protected void optimizeTaskPlan(List> rootTasks, Pa } if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CHECK_CROSS_PRODUCT)) { - physicalCtx = new CrossProductCheck().resolve(physicalCtx); + physicalCtx = new CrossProductHandler().resolve(physicalCtx); } else { LOG.debug("Skipping cross product analysis"); } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/TezEdgeProperty.java ql/src/java/org/apache/hadoop/hive/ql/plan/TezEdgeProperty.java index bbed9be..d43b81a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/TezEdgeProperty.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TezEdgeProperty.java @@ -28,7 +28,8 @@ CONTAINS,//used for union (all?) CUSTOM_EDGE,//CO_PARTITION_EDGE CUSTOM_SIMPLE_EDGE,//PARTITION_EDGE - ONE_TO_ONE_EDGE + ONE_TO_ONE_EDGE, + XPROD_EDGE } private HiveConf hiveConf; @@ -107,4 +108,5 @@ public void setSlowStart(boolean slowStart) { public void setEdgeType(EdgeType type) { this.edgeType = type; } + } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java index 2dc334d..47aa936 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java @@ -109,8 +109,8 @@ public Vertex answer(InvocationOnMock invocation) throws Throwable { }); when(utils.createEdge(any(JobConf.class), any(Vertex.class), any(Vertex.class), - any(TezEdgeProperty.class), any(VertexType.class))).thenAnswer(new Answer() { - + any(TezEdgeProperty.class), any(BaseWork.class), any(TezWork.class))) + .thenAnswer(new Answer() { @Override public Edge answer(InvocationOnMock invocation) throws Throwable { Object[] args = invocation.getArguments(); diff --git ql/src/test/queries/clientpositive/cross_prod_1.q ql/src/test/queries/clientpositive/cross_prod_1.q new file mode 100644 index 0000000..b5a84ea --- /dev/null +++ ql/src/test/queries/clientpositive/cross_prod_1.q @@ -0,0 +1,34 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.tez.cartesian-product.enabled=true; + +create table X as +select distinct * from src order by key limit 10; + +explain select * from X as A, X as B order by A.key, B.key; +select * from X as A, X as B order by A.key, B.key; + +explain select * from X as A join X as B on A.key 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -300,7 +300,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in2.txt' into table sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_input2 -Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[18][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 diff --git ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out index 5984e8f..04da1f2 100644 --- ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out +++ ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out @@ -14,7 +14,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE my POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@myinput1 -Warning: Map Join MAPJOIN[14][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 diff --git ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out index 6ef1f34..3acbb20 100644 --- ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out +++ ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out @@ -134,7 +134,7 @@ POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket3out POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_medium@ds=2008-04-08 -Warning: Map Join MAPJOIN[34][bigTable=?] in task 'Map 3' is a cross product +Warning: Shuffle Join MERGEJOIN[34][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key @@ -148,8 +148,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 3 <- Map 1 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Map 3 <- Map 1 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (XPROD_EDGE), Map 6 (XPROD_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -336,29 +337,12 @@ STAGE PLANS: 1 Map 2 Position of Big Table: 2 Statistics: Num rows: 244 Data size: 43381 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 5 => 1 - keys: - 0 - 1 - input vertices: - 1 Map 5 - Position of Big Table: 0 - Statistics: Num rows: 244 Data size: 45577 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 244 Data size: 43381 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -465,7 +449,7 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_big/ds=2008-04-08 [c] /bucket_big/ds=2008-04-09 [c] - Map 5 + Map 6 Map Operator Tree: TableScan alias: d @@ -539,6 +523,30 @@ STAGE PLANS: Execution mode: llap Needs Tagging: false Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Position of Big Table: 0 + Statistics: Num rows: 244 Data size: 45577 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 5 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial @@ -573,7 +581,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[34][bigTable=?] in task 'Map 3' is a cross product +Warning: Shuffle Join MERGEJOIN[34][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@bucket_big diff --git ql/src/test/results/clientpositive/llap/cross_join.q.out ql/src/test/results/clientpositive/llap/cross_join.q.out index 9d664af..6bde893 100644 --- ql/src/test/results/clientpositive/llap/cross_join.q.out +++ ql/src/test/results/clientpositive/llap/cross_join.q.out @@ -12,7 +12,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -81,7 +81,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -215,7 +215,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select src.key from src join src src2 PREHOOK: type: QUERY POSTHOOK: query: explain select src.key from src join src src2 @@ -229,7 +229,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -241,26 +241,13 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0 - input vertices: - 1 Map 2 - Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 2 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -272,6 +259,24 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0 + Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -279,7 +284,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select src.key from src cross join src src2 PREHOOK: type: QUERY POSTHOOK: query: explain select src.key from src cross join src src2 @@ -293,7 +298,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -305,26 +310,13 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0 - input vertices: - 1 Map 2 - Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 2 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -336,6 +328,24 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0 + Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/cross_prod_1.q.out ql/src/test/results/clientpositive/llap/cross_prod_1.q.out new file mode 100644 index 0000000..fd03fe5 --- /dev/null +++ ql/src/test/results/clientpositive/llap/cross_prod_1.q.out @@ -0,0 +1,2502 @@ +PREHOOK: query: create table X as +select distinct * from src order by key limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@X +POSTHOOK: query: create table X as +select distinct * from src order by key limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@X +POSTHOOK: Lineage: x.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: x.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +Warning: Shuffle Join MERGEJOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain select * from X as A, X as B order by A.key, B.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from X as A, X as B order by A.key, B.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (XPROD_EDGE), Map 4 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 100 Data size: 73700 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: string) + sort order: ++ + Statistics: Num rows: 100 Data size: 73700 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col3 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 100 Data size: 73700 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 73700 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select * from X as A, X as B order by A.key, B.key +PREHOOK: type: QUERY +PREHOOK: Input: default@x +#### A masked pattern was here #### +POSTHOOK: query: select * from X as A, X as B order by A.key, B.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@x +#### A masked pattern was here #### +0 val_0 0 val_0 +0 val_0 10 val_10 +0 val_0 100 val_100 +0 val_0 103 val_103 +0 val_0 104 val_104 +0 val_0 105 val_105 +0 val_0 11 val_11 +0 val_0 111 val_111 +0 val_0 113 val_113 +0 val_0 114 val_114 +10 val_10 0 val_0 +10 val_10 10 val_10 +10 val_10 100 val_100 +10 val_10 103 val_103 +10 val_10 104 val_104 +10 val_10 105 val_105 +10 val_10 11 val_11 +10 val_10 111 val_111 +10 val_10 113 val_113 +10 val_10 114 val_114 +100 val_100 0 val_0 +100 val_100 10 val_10 +100 val_100 100 val_100 +100 val_100 103 val_103 +100 val_100 104 val_104 +100 val_100 105 val_105 +100 val_100 11 val_11 +100 val_100 111 val_111 +100 val_100 113 val_113 +100 val_100 114 val_114 +103 val_103 0 val_0 +103 val_103 10 val_10 +103 val_103 100 val_100 +103 val_103 103 val_103 +103 val_103 104 val_104 +103 val_103 105 val_105 +103 val_103 11 val_11 +103 val_103 111 val_111 +103 val_103 113 val_113 +103 val_103 114 val_114 +104 val_104 0 val_0 +104 val_104 10 val_10 +104 val_104 100 val_100 +104 val_104 103 val_103 +104 val_104 104 val_104 +104 val_104 105 val_105 +104 val_104 11 val_11 +104 val_104 111 val_111 +104 val_104 113 val_113 +104 val_104 114 val_114 +105 val_105 0 val_0 +105 val_105 10 val_10 +105 val_105 100 val_100 +105 val_105 103 val_103 +105 val_105 104 val_104 +105 val_105 105 val_105 +105 val_105 11 val_11 +105 val_105 111 val_111 +105 val_105 113 val_113 +105 val_105 114 val_114 +11 val_11 0 val_0 +11 val_11 10 val_10 +11 val_11 100 val_100 +11 val_11 103 val_103 +11 val_11 104 val_104 +11 val_11 105 val_105 +11 val_11 11 val_11 +11 val_11 111 val_111 +11 val_11 113 val_113 +11 val_11 114 val_114 +111 val_111 0 val_0 +111 val_111 10 val_10 +111 val_111 100 val_100 +111 val_111 103 val_103 +111 val_111 104 val_104 +111 val_111 105 val_105 +111 val_111 11 val_11 +111 val_111 111 val_111 +111 val_111 113 val_113 +111 val_111 114 val_114 +113 val_113 0 val_0 +113 val_113 10 val_10 +113 val_113 100 val_100 +113 val_113 103 val_103 +113 val_113 104 val_104 +113 val_113 105 val_105 +113 val_113 11 val_11 +113 val_113 111 val_111 +113 val_113 113 val_113 +113 val_113 114 val_114 +114 val_114 0 val_0 +114 val_114 10 val_10 +114 val_114 100 val_100 +114 val_114 103 val_103 +114 val_114 104 val_104 +114 val_114 105 val_105 +114 val_114 11 val_11 +114 val_114 111 val_111 +114 val_114 113 val_113 +114 val_114 114 val_114 +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain select * from X as A join X as B on A.key 4:boolean) -> 5:String - Statistics: Num rows: 25 Data size: 425 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 25 Data size: 425 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -140,10 +115,10 @@ STAGE PLANS: enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: true + allNative: true + usesVectorUDFAdaptor: false vectorized: true - Map 2 + Map 3 Map Operator Tree: TableScan alias: tsint @@ -177,6 +152,28 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 25 Data size: 425 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint), CASE WHEN (_col1 BETWEEN UDFToInteger(_col3) AND UDFToInteger(_col3)) THEN ('Ok') ELSE ('NoOk') END (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 25 Data size: 425 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 425 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -184,7 +181,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint PREHOOK: type: QUERY PREHOOK: Input: default@tint @@ -221,7 +218,7 @@ tint.rnum tsint.rnum tint.cint tsint.csint between_col 4 2 10 0 NoOk 4 3 10 1 NoOk 4 4 10 10 Ok -Warning: Map Join MAPJOIN[10][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[10][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain vectorization expression select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint PREHOOK: type: QUERY @@ -242,7 +239,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -261,45 +258,14 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 1] Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - Map Join Vectorization: - className: VectorMapJoinInnerMultiKeyOperator + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 25 Data size: 425 Basic stats: COMPLETE Column stats: NONE - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsTrue(col 4)(children: VectorUDFAdaptor(_col1 BETWEEN UDFToInteger(_col3) AND UDFToInteger(_col3))(children: col 3, col 3) -> 4:boolean) -> boolean - predicate: _col1 BETWEEN UDFToInteger(_col3) AND UDFToInteger(_col3) (type: boolean) - Statistics: Num rows: 2 Data size: 34 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 2, 1, 3] - Statistics: Num rows: 2 Data size: 34 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 2 Data size: 34 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -307,10 +273,10 @@ STAGE PLANS: enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: true + allNative: true + usesVectorUDFAdaptor: false vectorized: true - Map 2 + Map 3 Map Operator Tree: TableScan alias: tsint @@ -344,6 +310,31 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 25 Data size: 425 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 BETWEEN UDFToInteger(_col3) AND UDFToInteger(_col3) (type: boolean) + Statistics: Num rows: 2 Data size: 34 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 34 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 34 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -351,7 +342,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[10][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[10][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint PREHOOK: type: QUERY PREHOOK: Input: default@tint diff --git ql/src/test/results/clientpositive/llap/vector_complex_all.q.out ql/src/test/results/clientpositive/llap/vector_complex_all.q.out index 2f3f886..2268a15 100644 --- ql/src/test/results/clientpositive/llap/vector_complex_all.q.out +++ ql/src/test/results/clientpositive/llap/vector_complex_all.q.out @@ -610,7 +610,7 @@ b str two line1 four line2 six line3 -Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 4' is a cross product +Warning: Shuffle Join MERGEJOIN[15][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_0]] in Stage 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN VECTORIZATION DETAIL INSERT INTO TABLE orc_create_complex SELECT orc_create_staging.*, src1.key FROM orc_create_staging cross join src src1 cross join orc_create_staging spam1 cross join orc_create_staging spam2 @@ -635,7 +635,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 4 <- Map 1 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE), Map 4 (XPROD_EDGE), Map 5 (XPROD_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -678,7 +678,7 @@ STAGE PLANS: includeColumns: [0, 1, 2, 3] dataColumns: str:string, mp:map, lst:array, strct:struct partitionColumnCount: 0 - Map 2 + Map 3 Map Operator Tree: TableScan alias: spam2 @@ -715,7 +715,7 @@ STAGE PLANS: includeColumns: [] dataColumns: str:string, mp:map, lst:array, strct:struct partitionColumnCount: 0 - Map 3 + Map 4 Map Operator Tree: TableScan alias: spam1 @@ -752,7 +752,7 @@ STAGE PLANS: includeColumns: [] dataColumns: str:string, mp:map, lst:array, strct:struct partitionColumnCount: 0 - Map 4 + Map 5 Map Operator Tree: TableScan alias: src1 @@ -768,53 +768,23 @@ STAGE PLANS: native: true projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - Inner Join 0 to 3 - keys: - 0 - 1 - 2 - 3 - Map Join Vectorization: - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: One MapJoin Condition IS false - outputColumnNames: _col0, _col1, _col2, _col3, _col6 - input vertices: - 0 Map 1 - 1 Map 2 - 2 Map 3 - Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: map), _col2 (type: array), _col3 (type: struct), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1, 2, 3, 4] - Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.orc_create_complex + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0] + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) Execution mode: vectorized, llap Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -822,7 +792,33 @@ STAGE PLANS: includeColumns: [0] dataColumns: key:string, value:string partitionColumnCount: 0 - scratchColumnTypeNames: string, map, array, struct + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + Inner Join 0 to 3 + keys: + 0 + 1 + 2 + 3 + outputColumnNames: _col0, _col1, _col2, _col3, _col6 + Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: map), _col2 (type: array), _col3 (type: struct), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_create_complex Stage: Stage-2 Dependency Collection @@ -840,7 +836,7 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator -Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 4' is a cross product +Warning: Shuffle Join MERGEJOIN[15][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_0]] in Stage 'Reducer 2' is a cross product PREHOOK: query: INSERT INTO TABLE orc_create_complex SELECT orc_create_staging.*, src1.key FROM orc_create_staging cross join src src1 cross join orc_create_staging spam1 cross join orc_create_staging spam2 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out index e43b4d1..e644f14 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain vectorization expression select * from src @@ -26,10 +26,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 4 (BROADCAST_EDGE), Reducer 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 5 (XPROD_EDGE), Reducer 6 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -48,58 +48,14 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - Map Join Vectorization: - className: VectorMapJoinInnerMultiKeyOperator + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Reducer 4 - Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Map Join Vectorization: - className: VectorMapJoinOuterStringOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col0, _col1, _col2, _col3, _col5 - input vertices: - 1 Reducer 5 - Statistics: Num rows: 500 Data size: 98620 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 2, val 0) -> boolean, FilterExprAndExpr(children: SelectColumnIsNull(col 4) -> boolean, SelectColumnIsNotNull(col 0) -> boolean, FilterLongColGreaterEqualLongColumn(col 3, col 2) -> boolean) -> boolean) -> boolean - predicate: ((_col2 = 0) or (_col5 is null and _col0 is not null and (_col3 >= _col2))) (type: boolean) - Statistics: Num rows: 500 Data size: 98620 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1] - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: @@ -110,7 +66,7 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 3 + Map 4 Map Operator Tree: TableScan alias: src @@ -180,6 +136,39 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col5 + input vertices: + 1 Reducer 6 + Statistics: Num rows: 500 Data size: 98620 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((_col2 = 0) or (_col5 is null and _col0 is not null and (_col3 >= _col2))) (type: boolean) + Statistics: Num rows: 500 Data size: 98620 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -207,7 +196,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 5 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -238,7 +227,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 5 + Reducer 6 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -287,7 +276,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from src where not key in @@ -316,7 +305,7 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@orcsrc POSTHOOK: Lineage: orcsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orcsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from orcsrc where not key in @@ -333,7 +322,7 @@ order by key POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### -Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from orcsrc where not key in diff --git ql/src/test/results/clientpositive/llap/vector_join_filters.q.out ql/src/test/results/clientpositive/llap/vector_join_filters.q.out index 1a492b6..4e5205f 100644 --- ql/src/test/results/clientpositive/llap/vector_join_filters.q.out +++ ql/src/test/results/clientpositive/llap/vector_join_filters.q.out @@ -26,7 +26,7 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@myinput1 POSTHOOK: Lineage: myinput1.key SIMPLE [(myinput1_txt)myinput1_txt.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: myinput1.value SIMPLE [(myinput1_txt)myinput1_txt.FieldSchema(name:value, type:int, comment:null), ] -Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[18][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 diff --git ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out index 3497164..056360f 100644 --- ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out +++ ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out @@ -26,7 +26,7 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@myinput1 POSTHOOK: Lineage: myinput1.key SIMPLE [(myinput1_txt)myinput1_txt.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: myinput1.value SIMPLE [(myinput1_txt)myinput1_txt.FieldSchema(name:value, type:int, comment:null), ] -Warning: Map Join MAPJOIN[14][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 diff --git ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index 16cae79..7a4fe36 100644 --- ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -2375,7 +2375,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -2524,7 +2524,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Map 4 (XPROD_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -5507,7 +5507,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### 1000 -Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' @@ -5525,8 +5525,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -5540,24 +5541,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Map 2 - Map Operator Tree: - TableScan - alias: srcpart - filterExpr: (ds = '2008-04-08') (type: boolean) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: '2008-04-08' (type: string) mode: hash @@ -5578,6 +5561,25 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Statistics: Num rows: 500000 Data size: 11124000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -5589,30 +5591,17 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - input vertices: - 0 Map 1 - Statistics: Num rows: 500000 Data size: 11124000 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -5624,17 +5613,15 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -5642,7 +5629,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart diff --git ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out index 2c66856..052fda6 100644 --- ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[43][bigTable=?] in task 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from ( select count(*) as h8_30_to_9 @@ -32,9 +32,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 3 (BROADCAST_EDGE) - Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) + Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -56,7 +57,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) input vertices: - 1 Map 4 + 1 Map 5 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() @@ -74,7 +75,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) input vertices: - 1 Map 5 + 1 Map 6 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() @@ -87,7 +88,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: src1 @@ -106,7 +107,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 177 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: src1 @@ -133,24 +134,29 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - input vertices: - 1 Reducer 3 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -169,7 +175,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[43][bigTable=?] in task 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select * from ( select count(*) as h8_30_to_9 from src diff --git ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out index 4dfcc33..a709920 100644 --- ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out +++ ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out @@ -1215,7 +1215,7 @@ POSTHOOK: Lineage: decimal_mapjoin.cdecimal1 EXPRESSION [(alltypesorc)alltypesor POSTHOOK: Lineage: decimal_mapjoin.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_mapjoin.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_mapjoin.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] -Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 FROM decimal_mapjoin l JOIN decimal_mapjoin r ON l.cint = r.cint @@ -1235,7 +1235,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1250,29 +1250,12 @@ STAGE PLANS: expressions: cdecimal1 (type: decimal(20,10)) outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 551 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col2 - input vertices: - 1 Map 2 - Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 6981 (type: int), 6981 (type: int), _col0 (type: decimal(20,10)), _col2 (type: decimal(23,14)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + sort order: + Statistics: Num rows: 5 Data size: 551 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(20,10)) Execution mode: vectorized - Map 2 + Map 3 Map Operator Tree: TableScan alias: r @@ -1289,6 +1272,27 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 551 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(23,14)) Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col2 + Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 6981 (type: int), 6981 (type: int), _col0 (type: decimal(20,10)), _col2 (type: decimal(23,14)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1296,7 +1300,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 FROM decimal_mapjoin l JOIN decimal_mapjoin r ON l.cint = r.cint @@ -1312,8 +1316,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_mapjoin #### A masked pattern was here #### 6981 6981 NULL NULL -6981 6981 NULL -617.56077692307690 -6981 6981 NULL -617.56077692307690 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL @@ -1321,14 +1323,13 @@ POSTHOOK: Input: default@decimal_mapjoin 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL -6981 6981 NULL NULL 6981 6981 NULL -617.56077692307690 6981 6981 NULL -617.56077692307690 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL -6981 6981 NULL 6984454.21109769200000 6981 6981 NULL NULL +6981 6981 NULL 6984454.21109769200000 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL @@ -1337,8 +1338,8 @@ POSTHOOK: Input: default@decimal_mapjoin 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL -6981 6981 NULL 6984454.21109769200000 6981 6981 NULL NULL +6981 6981 NULL 6984454.21109769200000 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL @@ -1347,13 +1348,14 @@ POSTHOOK: Input: default@decimal_mapjoin 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL +6981 6981 NULL NULL 6981 6981 NULL 6984454.21109769200000 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL +6981 6981 NULL -617.56077692307690 +6981 6981 NULL -617.56077692307690 6981 6981 5831542.2692483780 NULL -6981 6981 5831542.2692483780 -617.56077692307690 -6981 6981 5831542.2692483780 -617.56077692307690 6981 6981 5831542.2692483780 NULL 6981 6981 5831542.2692483780 NULL 6981 6981 5831542.2692483780 NULL @@ -1361,9 +1363,9 @@ POSTHOOK: Input: default@decimal_mapjoin 6981 6981 5831542.2692483780 NULL 6981 6981 5831542.2692483780 NULL 6981 6981 5831542.2692483780 NULL +6981 6981 5831542.2692483780 -617.56077692307690 +6981 6981 5831542.2692483780 -617.56077692307690 6981 6981 NULL NULL -6981 6981 NULL -617.56077692307690 -6981 6981 NULL -617.56077692307690 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL @@ -1371,14 +1373,13 @@ POSTHOOK: Input: default@decimal_mapjoin 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL -6981 6981 NULL NULL 6981 6981 NULL -617.56077692307690 6981 6981 NULL -617.56077692307690 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL -6981 6981 NULL 6984454.21109769200000 6981 6981 NULL NULL +6981 6981 NULL 6984454.21109769200000 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL @@ -1387,13 +1388,14 @@ POSTHOOK: Input: default@decimal_mapjoin 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL +6981 6981 NULL NULL 6981 6981 NULL 6984454.21109769200000 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL +6981 6981 NULL -617.56077692307690 +6981 6981 NULL -617.56077692307690 6981 6981 -515.6210729730 NULL -6981 6981 -515.6210729730 -617.56077692307690 -6981 6981 -515.6210729730 -617.56077692307690 6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 NULL @@ -1401,17 +1403,19 @@ POSTHOOK: Input: default@decimal_mapjoin 6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 NULL -6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 -617.56077692307690 6981 6981 -515.6210729730 -617.56077692307690 6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 NULL +6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 6984454.21109769200000 6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 NULL -Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Map 1' is a cross product +6981 6981 -515.6210729730 -617.56077692307690 +6981 6981 -515.6210729730 -617.56077692307690 +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 FROM decimal_mapjoin l JOIN decimal_mapjoin r ON l.cint = r.cint @@ -1431,7 +1435,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1446,29 +1450,12 @@ STAGE PLANS: expressions: cdecimal1 (type: decimal(20,10)) outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 551 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col2 - input vertices: - 1 Map 2 - Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 6981 (type: int), 6981 (type: int), _col0 (type: decimal(20,10)), _col2 (type: decimal(23,14)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + sort order: + Statistics: Num rows: 5 Data size: 551 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(20,10)) Execution mode: vectorized - Map 2 + Map 3 Map Operator Tree: TableScan alias: r @@ -1485,6 +1472,27 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 551 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(23,14)) Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col2 + Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 6981 (type: int), 6981 (type: int), _col0 (type: decimal(20,10)), _col2 (type: decimal(23,14)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1492,7 +1500,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 FROM decimal_mapjoin l JOIN decimal_mapjoin r ON l.cint = r.cint @@ -1508,8 +1516,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_mapjoin #### A masked pattern was here #### 6981 6981 NULL NULL -6981 6981 NULL -617.56077692307690 -6981 6981 NULL -617.56077692307690 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL @@ -1517,14 +1523,13 @@ POSTHOOK: Input: default@decimal_mapjoin 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL -6981 6981 NULL NULL 6981 6981 NULL -617.56077692307690 6981 6981 NULL -617.56077692307690 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL -6981 6981 NULL 6984454.21109769200000 6981 6981 NULL NULL +6981 6981 NULL 6984454.21109769200000 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL @@ -1533,8 +1538,8 @@ POSTHOOK: Input: default@decimal_mapjoin 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL -6981 6981 NULL 6984454.21109769200000 6981 6981 NULL NULL +6981 6981 NULL 6984454.21109769200000 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL @@ -1543,13 +1548,14 @@ POSTHOOK: Input: default@decimal_mapjoin 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL +6981 6981 NULL NULL 6981 6981 NULL 6984454.21109769200000 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL +6981 6981 NULL -617.56077692307690 +6981 6981 NULL -617.56077692307690 6981 6981 5831542.2692483780 NULL -6981 6981 5831542.2692483780 -617.56077692307690 -6981 6981 5831542.2692483780 -617.56077692307690 6981 6981 5831542.2692483780 NULL 6981 6981 5831542.2692483780 NULL 6981 6981 5831542.2692483780 NULL @@ -1557,9 +1563,9 @@ POSTHOOK: Input: default@decimal_mapjoin 6981 6981 5831542.2692483780 NULL 6981 6981 5831542.2692483780 NULL 6981 6981 5831542.2692483780 NULL +6981 6981 5831542.2692483780 -617.56077692307690 +6981 6981 5831542.2692483780 -617.56077692307690 6981 6981 NULL NULL -6981 6981 NULL -617.56077692307690 -6981 6981 NULL -617.56077692307690 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL @@ -1567,14 +1573,13 @@ POSTHOOK: Input: default@decimal_mapjoin 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL -6981 6981 NULL NULL 6981 6981 NULL -617.56077692307690 6981 6981 NULL -617.56077692307690 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL -6981 6981 NULL 6984454.21109769200000 6981 6981 NULL NULL +6981 6981 NULL 6984454.21109769200000 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL @@ -1583,13 +1588,14 @@ POSTHOOK: Input: default@decimal_mapjoin 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL +6981 6981 NULL NULL 6981 6981 NULL 6984454.21109769200000 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL +6981 6981 NULL -617.56077692307690 +6981 6981 NULL -617.56077692307690 6981 6981 -515.6210729730 NULL -6981 6981 -515.6210729730 -617.56077692307690 -6981 6981 -515.6210729730 -617.56077692307690 6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 NULL @@ -1597,16 +1603,18 @@ POSTHOOK: Input: default@decimal_mapjoin 6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 NULL -6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 -617.56077692307690 6981 6981 -515.6210729730 -617.56077692307690 6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 NULL +6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 6984454.21109769200000 6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 NULL +6981 6981 -515.6210729730 -617.56077692307690 +6981 6981 -515.6210729730 -617.56077692307690 PREHOOK: query: DROP TABLE decimal_mapjoin PREHOOK: type: DROPTABLE PREHOOK: Input: default@decimal_mapjoin