diff --git common/src/java/org/apache/hadoop/hive/common/jsonexplain/Vertex.java common/src/java/org/apache/hadoop/hive/common/jsonexplain/Vertex.java index b7dc88c..a73893f 100644 --- common/src/java/org/apache/hadoop/hive/common/jsonexplain/Vertex.java +++ common/src/java/org/apache/hadoop/hive/common/jsonexplain/Vertex.java @@ -74,7 +74,7 @@ public VertexType vertexType; public static enum EdgeType { - BROADCAST, SHUFFLE, MULTICAST, PARTITION_ONLY_SHUFFLE, FORWARD, UNKNOWN + BROADCAST, SHUFFLE, MULTICAST, PARTITION_ONLY_SHUFFLE, FORWARD, XPROD_EDGE, UNKNOWN }; public String edgeType; diff --git common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/TezJsonParser.java common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/TezJsonParser.java index 69e5358..b6cca10 100644 --- common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/TezJsonParser.java +++ common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/TezJsonParser.java @@ -36,6 +36,8 @@ public String mapEdgeType(String edgeName) { return "MULTICAST"; case "ONE_TO_ONE_EDGE": return "FORWARD"; + case "XPROD_EDGE": + return "XPROD_EDGE"; default: return "UNKNOWN"; } diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index a6ecb37..59dc333 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3027,6 +3027,8 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal 0.5f, "The maximum fraction of JVM memory which Tez will reserve for the processor"), TEZ_TASK_SCALE_MEMORY_RESERVE_FRACTION("hive.tez.task.scale.memory.reserve.fraction", -1f, "The customized fraction of JVM memory which Tez will reserve for the processor"), + TEZ_CARTESIAN_PRODUCT_EDGE_ENABLED("hive.tez.cartesian-product.enabled", + false, "Use Tez cartesian product edge to speed up cross product"), // The default is different on the client and server, so it's null here. LLAP_IO_ENABLED("hive.llap.io.enabled", null, "Whether the LLAP IO layer is enabled."), LLAP_IO_TRACE_SIZE("hive.llap.io.trace.size", "2Mb", diff --git data/conf/llap/hive-site.xml data/conf/llap/hive-site.xml index 870b584..8cd5144 100644 --- data/conf/llap/hive-site.xml +++ data/conf/llap/hive-site.xml @@ -338,4 +338,9 @@ true + + hive.tez.cartesian-product.enabled + true + + diff --git data/conf/tez/hive-site.xml data/conf/tez/hive-site.xml index 35e8c99..f1dabf5 100644 --- data/conf/tez/hive-site.xml +++ data/conf/tez/hive-site.xml @@ -283,4 +283,9 @@ true + + hive.tez.cartesian-product.enabled + true + + diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index a081638..c338826 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -139,6 +139,9 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ count_dist_rewrite.q,\ create_merge_compressed.q,\ cross_join.q,\ + cross_prod_1.q,\ + cross_prod_3.q,\ + cross_prod_4.q,\ cross_product_check_1.q,\ cross_product_check_2.q,\ ctas.q,\ @@ -508,6 +511,9 @@ minillaplocal.query.files=\ correlationoptimizer4.q,\ correlationoptimizer6.q,\ disable_merge_for_bucketing.q,\ + cross_prod_1.q,\ + cross_prod_3.q,\ + cross_prod_4.q,\ dynamic_partition_pruning.q,\ dynamic_semijoin_reduction.q,\ dynamic_semijoin_reduction_2.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java index aae3480..5c338b8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java @@ -46,6 +46,9 @@ import org.apache.tez.mapreduce.common.MRInputSplitDistributor; import org.apache.tez.mapreduce.hadoop.InputSplitInfo; import org.apache.tez.mapreduce.protos.MRRuntimeProtos; +import org.apache.tez.runtime.library.api.Partitioner; +import org.apache.tez.runtime.library.cartesianproduct.CartesianProductConfig; +import org.apache.tez.runtime.library.cartesianproduct.CartesianProductEdgeManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -135,6 +138,7 @@ import org.apache.tez.runtime.library.conf.UnorderedKVEdgeConfig; import org.apache.tez.runtime.library.conf.UnorderedPartitionedKVEdgeConfig; import org.apache.tez.runtime.library.input.ConcatenatedMergedKeyValueInput; +import org.apache.tez.runtime.library.cartesianproduct.CartesianProductVertexManager; /** * DagUtils. DagUtils is a collection of helper methods to convert @@ -264,7 +268,7 @@ private JobConf initializeVertexConf(JobConf baseConf, Context context, MapWork */ @SuppressWarnings("rawtypes") public GroupInputEdge createEdge(VertexGroup group, JobConf vConf, Vertex w, - TezEdgeProperty edgeProp, VertexType vertexType) + TezEdgeProperty edgeProp, BaseWork work, TezWork tezWork) throws IOException { Class mergeInputClass; @@ -279,7 +283,8 @@ public GroupInputEdge createEdge(VertexGroup group, JobConf vConf, Vertex w, case CUSTOM_EDGE: { mergeInputClass = ConcatenatedMergedKeyValueInput.class; int numBuckets = edgeProp.getNumBuckets(); - CustomVertexConfiguration vertexConf = new CustomVertexConfiguration(numBuckets, vertexType); + CustomVertexConfiguration vertexConf + = new CustomVertexConfiguration(numBuckets, tezWork.getVertexType(work)); DataOutputBuffer dob = new DataOutputBuffer(); vertexConf.write(dob); VertexManagerPluginDescriptor desc = @@ -299,6 +304,10 @@ public GroupInputEdge createEdge(VertexGroup group, JobConf vConf, Vertex w, mergeInputClass = ConcatenatedMergedKeyValueInput.class; break; + case XPROD_EDGE: + mergeInputClass = ConcatenatedMergedKeyValueInput.class; + break; + case SIMPLE_EDGE: setupAutoReducerParallelism(edgeProp, w); // fall through @@ -308,7 +317,7 @@ public GroupInputEdge createEdge(VertexGroup group, JobConf vConf, Vertex w, break; } - return GroupInputEdge.create(group, w, createEdgeProperty(edgeProp, vConf), + return GroupInputEdge.create(group, w, createEdgeProperty(w, edgeProp, vConf, work, tezWork), InputDescriptor.create(mergeInputClass.getName())); } @@ -322,13 +331,14 @@ public GroupInputEdge createEdge(VertexGroup group, JobConf vConf, Vertex w, * @return */ public Edge createEdge(JobConf vConf, Vertex v, Vertex w, TezEdgeProperty edgeProp, - VertexType vertexType) + BaseWork work, TezWork tezWork) throws IOException { switch(edgeProp.getEdgeType()) { case CUSTOM_EDGE: { int numBuckets = edgeProp.getNumBuckets(); - CustomVertexConfiguration vertexConf = new CustomVertexConfiguration(numBuckets, vertexType); + CustomVertexConfiguration vertexConf = + new CustomVertexConfiguration(numBuckets, tezWork.getVertexType(work)); DataOutputBuffer dob = new DataOutputBuffer(); vertexConf.write(dob); VertexManagerPluginDescriptor desc = VertexManagerPluginDescriptor.create( @@ -339,6 +349,9 @@ public Edge createEdge(JobConf vConf, Vertex v, Vertex w, TezEdgeProperty edgePr w.setVertexManagerPlugin(desc); break; } + case XPROD_EDGE: + break; + case SIMPLE_EDGE: { setupAutoReducerParallelism(edgeProp, w); break; @@ -352,14 +365,15 @@ public Edge createEdge(JobConf vConf, Vertex v, Vertex w, TezEdgeProperty edgePr // nothing } - return Edge.create(v, w, createEdgeProperty(edgeProp, vConf)); + return Edge.create(v, w, createEdgeProperty(w, edgeProp, vConf, work, tezWork)); } /* * Helper function to create an edge property from an edge type. */ - private EdgeProperty createEdgeProperty(TezEdgeProperty edgeProp, Configuration conf) - throws IOException { + private EdgeProperty createEdgeProperty(Vertex w, TezEdgeProperty edgeProp, + Configuration conf, BaseWork work, TezWork tezWork) + throws IOException { MRHelpers.translateMRConfToTez(conf); String keyClass = conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS); String valClass = conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS); @@ -412,7 +426,23 @@ private EdgeProperty createEdgeProperty(TezEdgeProperty edgeProp, Configuration .setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null) .build(); return et4Conf.createDefaultOneToOneEdgeProperty(); + case XPROD_EDGE: + EdgeManagerPluginDescriptor edgeManagerDescriptor = + EdgeManagerPluginDescriptor.create(CartesianProductEdgeManager.class.getName()); + List crossProductSources = new ArrayList<>(); + for (BaseWork parentWork : tezWork.getParents(work)) { + if (EdgeType.XPROD_EDGE == tezWork.getEdgeType(parentWork, work)) { + crossProductSources.add(parentWork.getName()); + } + } + CartesianProductConfig cpConfig = new CartesianProductConfig(crossProductSources); + edgeManagerDescriptor.setUserPayload(cpConfig.toUserPayload(new TezConfiguration(conf))); + UnorderedPartitionedKVEdgeConfig cpEdgeConf = + UnorderedPartitionedKVEdgeConfig.newBuilder(keyClass, valClass, + ValueHashPartitioner.class.getName()).build(); + return cpEdgeConf.createDefaultCustomEdgeProperty(edgeManagerDescriptor); case SIMPLE_EDGE: + // fallthrough default: assert partitionerClassName != null; partitionerConf = createPartitionerConf(partitionerClassName, conf); @@ -427,6 +457,14 @@ private EdgeProperty createEdgeProperty(TezEdgeProperty edgeProp, Configuration } } + public static class ValueHashPartitioner implements Partitioner { + + @Override + public int getPartition(Object key, Object value, int numPartitions) { + return (value.hashCode() & 2147483647) % numPartitions; + } + } + /** * Utility method to create a stripped down configuration for the MR partitioner. * @@ -1240,6 +1278,21 @@ public Vertex createVertex(JobConf conf, BaseWork work, } else if (work instanceof MergeJoinWork) { v = createVertex(conf, (MergeJoinWork) work, appJarLr, additionalLr, fileSystem, scratchDir, ctx, vertexType); + // set VertexManagerPlugin if whether it's a cross product destination vertex + List crossProductSources = new ArrayList<>(); + for (BaseWork parentWork : tezWork.getParents(work)) { + if (tezWork.getEdgeType(parentWork, work) == EdgeType.XPROD_EDGE) { + crossProductSources.add(parentWork.getName()); + } + } + + if (!crossProductSources.isEmpty()) { + CartesianProductConfig cpConfig = new CartesianProductConfig(crossProductSources); + v.setVertexManagerPlugin( + VertexManagerPluginDescriptor.create(CartesianProductVertexManager.class.getName()) + .setUserPayload(cpConfig.toUserPayload(new TezConfiguration(conf)))); + // parallelism shouldn't be set for cartesian product vertex + } } else { // something is seriously wrong if this is happening throw new HiveException(ErrorMsg.GENERIC_ERROR.getErrorCodedMsg()); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java index c3a2a2b..a1b7cfb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java @@ -477,7 +477,7 @@ DAG build(JobConf conf, TezWork work, Path scratchDir, for (BaseWork v: children) { // finally we can create the grouped edge GroupInputEdge e = utils.createEdge(group, parentConf, - workToVertex.get(v), work.getEdgeProperty(w, v), work.getVertexType(v)); + workToVertex.get(v), work.getEdgeProperty(w, v), v, work); dag.addEdge(e); } @@ -506,8 +506,7 @@ DAG build(JobConf conf, TezWork work, Path scratchDir, Edge e = null; TezEdgeProperty edgeProp = work.getEdgeProperty(w, v); - - e = utils.createEdge(wxConf, wx, workToVertex.get(v), edgeProp, work.getVertexType(v)); + e = utils.createEdge(wxConf, wx, workToVertex.get(v), edgeProp, v, work); dag.addEdge(e); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index 53d34bb..9175597 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -102,6 +102,14 @@ MemoryMonitorInfo memoryMonitorInfo = getMemoryMonitorInfo(maxSize, context.conf); joinOp.getConf().setMemoryMonitorInfo(memoryMonitorInfo); + // not use map join in case of cross product + boolean cartesianProductEdgeEnabled = + HiveConf.getBoolVar(context.conf, HiveConf.ConfVars.TEZ_CARTESIAN_PRODUCT_EDGE_ENABLED); + if (cartesianProductEdgeEnabled && !hasOuterJoin(joinOp) && isCrossProduct(joinOp)) { + fallbackToMergeJoin(joinOp, context); + return null; + } + TezBucketJoinProcCtx tezBucketJoinProcCtx = new TezBucketJoinProcCtx(context.conf); boolean hiveConvertJoin = context.conf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN) & !context.parseContext.getDisableMapJoin(); @@ -614,6 +622,42 @@ private boolean checkColEquality(List> grandParentColNames, return false; } + private boolean hasOuterJoin(JoinOperator joinOp) throws SemanticException { + boolean hasOuter = false; + for (JoinCondDesc joinCondDesc : joinOp.getConf().getConds()) { + switch (joinCondDesc.getType()) { + case JoinDesc.INNER_JOIN: + case JoinDesc.LEFT_SEMI_JOIN: + case JoinDesc.UNIQUE_JOIN: + hasOuter = false; + break; + + case JoinDesc.FULL_OUTER_JOIN: + case JoinDesc.LEFT_OUTER_JOIN: + case JoinDesc.RIGHT_OUTER_JOIN: + hasOuter = true; + break; + + default: + throw new SemanticException("Unknown join type " + joinCondDesc.getType()); + } + } + return hasOuter; + } + + private boolean isCrossProduct(JoinOperator joinOp) { + ExprNodeDesc[][] joinExprs = joinOp.getConf().getJoinKeys(); + if (joinExprs != null) { + for (ExprNodeDesc[] expr : joinExprs) { + if (expr != null && expr.length != 0) { + return false; + } + } + } + + return true; + } + /** * Obtain big table position for join. * @@ -639,26 +683,7 @@ public int getMapJoinConversionPos(JoinOperator joinOp, OptimizeTezProcContext c * case this for now. */ if (joinOp.getConf().getConds().length > 1) { - boolean hasOuter = false; - for (JoinCondDesc joinCondDesc : joinOp.getConf().getConds()) { - switch (joinCondDesc.getType()) { - case JoinDesc.INNER_JOIN: - case JoinDesc.LEFT_SEMI_JOIN: - case JoinDesc.UNIQUE_JOIN: - hasOuter = false; - break; - - case JoinDesc.FULL_OUTER_JOIN: - case JoinDesc.LEFT_OUTER_JOIN: - case JoinDesc.RIGHT_OUTER_JOIN: - hasOuter = true; - break; - - default: - throw new SemanticException("Unknown join type " + joinCondDesc.getType()); - } - } - if (hasOuter) { + if (hasOuterJoin(joinOp)) { return -1; } } @@ -1100,14 +1125,19 @@ private void fallbackToReduceSideJoin(JoinOperator joinOp, OptimizeTezProcContex } } + // we are just converting to a common merge join operator. The shuffle + // join in map-reduce case. + fallbackToMergeJoin(joinOp, context); + } + + private void fallbackToMergeJoin(JoinOperator joinOp, OptimizeTezProcContext context) + throws SemanticException { int pos = getMapJoinConversionPos(joinOp, context, estimateNumBuckets(joinOp, false), true, Long.MAX_VALUE, false); if (pos < 0) { LOG.info("Could not get a valid join position. Defaulting to position 0"); pos = 0; } - // we are just converting to a common merge join operator. The shuffle - // join in map-reduce case. LOG.info("Fallback to common merge join operator"); convertJoinSMBJoin(joinOp, context, pos, 0, false); } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductCheck.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductCheck.java deleted file mode 100644 index 4b35bb6..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductCheck.java +++ /dev/null @@ -1,368 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.optimizer.physical; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Stack; -import java.util.TreeMap; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; -import org.apache.hadoop.hive.ql.exec.ConditionalTask; -import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator; -import org.apache.hadoop.hive.ql.exec.JoinOperator; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator; -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; -import org.apache.hadoop.hive.ql.exec.TableScanOperator; -import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; -import org.apache.hadoop.hive.ql.exec.tez.TezTask; -import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; -import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; -import org.apache.hadoop.hive.ql.lib.Dispatcher; -import org.apache.hadoop.hive.ql.lib.GraphWalker; -import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.lib.NodeProcessor; -import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.lib.Rule; -import org.apache.hadoop.hive.ql.lib.RuleRegExp; -import org.apache.hadoop.hive.ql.lib.TaskGraphWalker; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.BaseWork; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.MapJoinDesc; -import org.apache.hadoop.hive.ql.plan.MapWork; -import org.apache.hadoop.hive.ql.plan.MapredWork; -import org.apache.hadoop.hive.ql.plan.MergeJoinWork; -import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; -import org.apache.hadoop.hive.ql.plan.ReduceWork; -import org.apache.hadoop.hive.ql.plan.TableScanDesc; -import org.apache.hadoop.hive.ql.plan.TezWork; -import org.apache.hadoop.hive.ql.session.SessionState; - -/* - * Check each MapJoin and ShuffleJoin Operator to see they are performing a cross product. - * If yes, output a warning to the Session's console. - * The Checks made are the following: - * 1. MR, Shuffle Join: - * Check the parent ReduceSinkOp of the JoinOp. If its keys list is size = 0, then - * this is a cross product. - * The parent ReduceSinkOp is in the MapWork for the same Stage. - * 2. MR, MapJoin: - * If the keys expr list on the mapJoin Desc is an empty list for any input, - * this implies a cross product. - * 3. Tez, Shuffle Join: - * Check the parent ReduceSinkOp of the JoinOp. If its keys list is size = 0, then - * this is a cross product. - * The parent ReduceSinkOp checked is based on the ReduceWork.tagToInput map on the - * reduceWork that contains the JoinOp. - * 4. Tez, Map Join: - * If the keys expr list on the mapJoin Desc is an empty list for any input, - * this implies a cross product. - */ -public class CrossProductCheck implements PhysicalPlanResolver, Dispatcher { - - protected static transient final Logger LOG = LoggerFactory - .getLogger(CrossProductCheck.class); - - @Override - public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { - TaskGraphWalker ogw = new TaskGraphWalker(this); - - ArrayList topNodes = new ArrayList(); - topNodes.addAll(pctx.getRootTasks()); - - ogw.startWalking(topNodes, null); - return pctx; - } - - @Override - public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) - throws SemanticException { - @SuppressWarnings("unchecked") - Task currTask = (Task) nd; - if (currTask instanceof MapRedTask) { - MapRedTask mrTsk = (MapRedTask)currTask; - MapredWork mrWrk = mrTsk.getWork(); - checkMapJoins(mrTsk); - checkMRReducer(currTask.toString(), mrWrk); - } else if (currTask instanceof ConditionalTask ) { - List> taskListInConditionalTask = - ((ConditionalTask) currTask).getListTasks(); - for(Task tsk: taskListInConditionalTask){ - dispatch(tsk, stack, nodeOutputs); - } - - } else if (currTask instanceof TezTask) { - TezTask tzTask = (TezTask) currTask; - TezWork tzWrk = tzTask.getWork(); - checkMapJoins(tzWrk); - checkTezReducer(tzWrk); - } - return null; - } - - private void warn(String msg) { - SessionState.getConsole().printInfo("Warning: " + msg, false); - } - - private void checkMapJoins(MapRedTask mrTsk) throws SemanticException { - MapredWork mrWrk = mrTsk.getWork(); - MapWork mapWork = mrWrk.getMapWork(); - List warnings = new MapJoinCheck(mrTsk.toString()).analyze(mapWork); - if (!warnings.isEmpty()) { - for (String w : warnings) { - warn(w); - } - } - ReduceWork redWork = mrWrk.getReduceWork(); - if (redWork != null) { - warnings = new MapJoinCheck(mrTsk.toString()).analyze(redWork); - if (!warnings.isEmpty()) { - for (String w : warnings) { - warn(w); - } - } - } - } - - private void checkMapJoins(TezWork tzWrk) throws SemanticException { - for(BaseWork wrk : tzWrk.getAllWork() ) { - - if ( wrk instanceof MergeJoinWork ) { - wrk = ((MergeJoinWork)wrk).getMainWork(); - } - - List warnings = new MapJoinCheck(wrk.getName()).analyze(wrk); - if ( !warnings.isEmpty() ) { - for(String w : warnings) { - warn(w); - } - } - } - } - - private void checkTezReducer(TezWork tzWrk) throws SemanticException { - for(BaseWork wrk : tzWrk.getAllWork() ) { - - if ( wrk instanceof MergeJoinWork ) { - wrk = ((MergeJoinWork)wrk).getMainWork(); - } - - if ( !(wrk instanceof ReduceWork ) ) { - continue; - } - ReduceWork rWork = (ReduceWork) wrk; - Operator reducer = ((ReduceWork)wrk).getReducer(); - if ( reducer instanceof JoinOperator || reducer instanceof CommonMergeJoinOperator ) { - Map rsInfo = new TreeMap(); - for(Map.Entry e : rWork.getTagToInput().entrySet()) { - rsInfo.putAll(getReducerInfo(tzWrk, rWork.getName(), e.getValue())); - } - checkForCrossProduct(rWork.getName(), reducer, rsInfo); - } - } - } - - private void checkMRReducer(String taskName, MapredWork mrWrk) throws SemanticException { - ReduceWork rWrk = mrWrk.getReduceWork(); - if ( rWrk == null) { - return; - } - Operator reducer = rWrk.getReducer(); - if ( reducer instanceof JoinOperator|| reducer instanceof CommonMergeJoinOperator ) { - BaseWork prntWork = mrWrk.getMapWork(); - checkForCrossProduct(taskName, reducer, - new ExtractReduceSinkInfo(null).analyze(prntWork)); - } - } - - private void checkForCrossProduct(String taskName, - Operator reducer, - Map rsInfo) { - if ( rsInfo.isEmpty() ) { - return; - } - Iterator it = rsInfo.values().iterator(); - ExtractReduceSinkInfo.Info info = it.next(); - if (info.keyCols.size() == 0) { - List iAliases = new ArrayList(); - iAliases.addAll(info.inputAliases); - while (it.hasNext()) { - info = it.next(); - iAliases.addAll(info.inputAliases); - } - String warning = String.format( - "Shuffle Join %s[tables = %s] in Stage '%s' is a cross product", - reducer.toString(), - iAliases, - taskName); - warn(warning); - } - } - - private Map getReducerInfo(TezWork tzWrk, String vertex, String prntVertex) - throws SemanticException { - BaseWork prntWork = tzWrk.getWorkMap().get(prntVertex); - return new ExtractReduceSinkInfo(vertex).analyze(prntWork); - } - - /* - * Given a Work descriptor and the TaskName for the work - * this is responsible to check each MapJoinOp for cross products. - * The analyze call returns the warnings list. - *

- * For MR the taskname is the StageName, for Tez it is the vertex name. - */ - public static class MapJoinCheck implements NodeProcessor, NodeProcessorCtx { - - final List warnings; - final String taskName; - - MapJoinCheck(String taskName) { - this.taskName = taskName; - warnings = new ArrayList(); - } - - List analyze(BaseWork work) throws SemanticException { - Map opRules = new LinkedHashMap(); - opRules.put(new RuleRegExp("R1", MapJoinOperator.getOperatorName() - + "%"), this); - Dispatcher disp = new DefaultRuleDispatcher(new NoopProcessor(), opRules, this); - GraphWalker ogw = new DefaultGraphWalker(disp); - ArrayList topNodes = new ArrayList(); - topNodes.addAll(work.getAllRootOperators()); - ogw.startWalking(topNodes, null); - return warnings; - } - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - @SuppressWarnings("unchecked") - AbstractMapJoinOperator mjOp = (AbstractMapJoinOperator) nd; - MapJoinDesc mjDesc = mjOp.getConf(); - - String bigTablAlias = mjDesc.getBigTableAlias(); - if ( bigTablAlias == null ) { - Operator parent = null; - for(Operator op : mjOp.getParentOperators() ) { - if ( op instanceof TableScanOperator ) { - parent = op; - } - } - if ( parent != null) { - TableScanDesc tDesc = ((TableScanOperator)parent).getConf(); - bigTablAlias = tDesc.getAlias(); - } - } - bigTablAlias = bigTablAlias == null ? "?" : bigTablAlias; - - List joinExprs = mjDesc.getKeys().values().iterator().next(); - - if ( joinExprs.size() == 0 ) { - warnings.add( - String.format("Map Join %s[bigTable=%s] in task '%s' is a cross product", - mjOp.toString(), bigTablAlias, taskName)); - } - - return null; - } - } - - /* - * for a given Work Descriptor, it extracts information about the ReduceSinkOps - * in the Work. For Tez, you can restrict it to ReduceSinks for a particular output - * vertex. - */ - public static class ExtractReduceSinkInfo implements NodeProcessor, NodeProcessorCtx { - - static class Info { - List keyCols; - List inputAliases; - - Info(List keyCols, List inputAliases) { - this.keyCols = keyCols; - this.inputAliases = inputAliases == null ? new ArrayList() : inputAliases; - } - - Info(List keyCols, String[] inputAliases) { - this.keyCols = keyCols; - this.inputAliases = inputAliases == null ? new ArrayList() : Arrays.asList(inputAliases); - } - } - - final String outputTaskName; - final Map reduceSinkInfo; - - ExtractReduceSinkInfo(String parentTaskName) { - this.outputTaskName = parentTaskName; - reduceSinkInfo = new HashMap(); - } - - Map analyze(BaseWork work) throws SemanticException { - Map opRules = new LinkedHashMap(); - opRules.put(new RuleRegExp("R1", ReduceSinkOperator.getOperatorName() - + "%"), this); - Dispatcher disp = new DefaultRuleDispatcher(new NoopProcessor(), opRules, this); - GraphWalker ogw = new DefaultGraphWalker(disp); - ArrayList topNodes = new ArrayList(); - topNodes.addAll(work.getAllRootOperators()); - ogw.startWalking(topNodes, null); - return reduceSinkInfo; - } - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - ReduceSinkOperator rsOp = (ReduceSinkOperator) nd; - ReduceSinkDesc rsDesc = rsOp.getConf(); - - if ( outputTaskName != null ) { - String rOutputName = rsDesc.getOutputName(); - if ( rOutputName == null || !outputTaskName.equals(rOutputName)) { - return null; - } - } - - reduceSinkInfo.put(rsDesc.getTag(), - new Info(rsDesc.getKeyCols(), rsOp.getInputAliases())); - - return null; - } - } - - static class NoopProcessor implements NodeProcessor { - @Override - public final Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - return nd; - } - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductHandler.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductHandler.java new file mode 100644 index 0000000..1442378 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductHandler.java @@ -0,0 +1,382 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.physical; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Stack; +import java.util.TreeMap; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.plan.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; +import org.apache.hadoop.hive.ql.exec.ConditionalTask; +import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator; +import org.apache.hadoop.hive.ql.exec.JoinOperator; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; +import org.apache.hadoop.hive.ql.exec.tez.TezTask; +import org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType; +import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.lib.TaskGraphWalker; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.session.SessionState; + +/* + * Check each MapJoin and ShuffleJoin Operator to see they are performing a cross product. + * If yes, output a warning to the Session's console. + * The Checks made are the following: + * 1. MR, Shuffle Join: + * Check the parent ReduceSinkOp of the JoinOp. If its keys list is size = 0, then + * this is a cross product. + * The parent ReduceSinkOp is in the MapWork for the same Stage. + * 2. MR, MapJoin: + * If the keys expr list on the mapJoin Desc is an empty list for any input, + * this implies a cross product. + * 3. Tez, Shuffle Join: + * Check the parent ReduceSinkOp of the JoinOp. If its keys list is size = 0, then + * this is a cross product. + * The parent ReduceSinkOp checked is based on the ReduceWork.tagToInput map on the + * reduceWork that contains the JoinOp. + * 4. Tez, Map Join: + * If the keys expr list on the mapJoin Desc is an empty list for any input, + * this implies a cross product. + */ +public class CrossProductHandler implements PhysicalPlanResolver, Dispatcher { + + protected static transient final Logger LOG = LoggerFactory + .getLogger(CrossProductHandler.class); + private Boolean cartesianProductEdgeEnabled = null; + + @Override + public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { + cartesianProductEdgeEnabled = + HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.TEZ_CARTESIAN_PRODUCT_EDGE_ENABLED); + TaskGraphWalker ogw = new TaskGraphWalker(this); + + ArrayList topNodes = new ArrayList(); + topNodes.addAll(pctx.getRootTasks()); + + ogw.startWalking(topNodes, null); + return pctx; + } + + @Override + public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) + throws SemanticException { + @SuppressWarnings("unchecked") + Task currTask = (Task) nd; + if (currTask instanceof MapRedTask) { + MapRedTask mrTsk = (MapRedTask)currTask; + MapredWork mrWrk = mrTsk.getWork(); + checkMapJoins(mrTsk); + checkMRReducer(currTask.toString(), mrWrk); + } else if (currTask instanceof ConditionalTask ) { + List> taskListInConditionalTask = + ((ConditionalTask) currTask).getListTasks(); + for(Task tsk: taskListInConditionalTask){ + dispatch(tsk, stack, nodeOutputs); + } + + } else if (currTask instanceof TezTask) { + TezTask tezTask = (TezTask) currTask; + TezWork tezWork = tezTask.getWork(); + checkMapJoins(tezWork); + checkTezReducer(tezWork); + } + return null; + } + + private void warn(String msg) { + SessionState.getConsole().printInfo("Warning: " + msg, false); + } + + private void checkMapJoins(MapRedTask mrTsk) throws SemanticException { + MapredWork mrWrk = mrTsk.getWork(); + MapWork mapWork = mrWrk.getMapWork(); + List warnings = new MapJoinCheck(mrTsk.toString()).analyze(mapWork); + if (!warnings.isEmpty()) { + for (String w : warnings) { + warn(w); + } + } + ReduceWork redWork = mrWrk.getReduceWork(); + if (redWork != null) { + warnings = new MapJoinCheck(mrTsk.toString()).analyze(redWork); + if (!warnings.isEmpty()) { + for (String w : warnings) { + warn(w); + } + } + } + } + + private void checkMapJoins(TezWork tezWork) throws SemanticException { + for(BaseWork wrk : tezWork.getAllWork() ) { + + if ( wrk instanceof MergeJoinWork ) { + wrk = ((MergeJoinWork)wrk).getMainWork(); + } + + List warnings = new MapJoinCheck(wrk.getName()).analyze(wrk); + if ( !warnings.isEmpty() ) { + for(String w : warnings) { + warn(w); + } + } + } + } + + private void checkTezReducer(TezWork tezWork) throws SemanticException { + for(BaseWork wrk : tezWork.getAllWork() ) { + BaseWork origWrk = null; + + if ( wrk instanceof MergeJoinWork ) { + origWrk = wrk; + wrk = ((MergeJoinWork)wrk).getMainWork(); + } + + if ( !(wrk instanceof ReduceWork ) ) { + continue; + } + ReduceWork rWork = (ReduceWork) wrk; + Operator reducer = ((ReduceWork)wrk).getReducer(); + if ( reducer instanceof JoinOperator || reducer instanceof CommonMergeJoinOperator ) { + boolean noOuterJoin = ((JoinDesc)reducer.getConf()).isNoOuterJoin(); + Map rsInfo = new TreeMap(); + for(Map.Entry e : rWork.getTagToInput().entrySet()) { + rsInfo.putAll(getReducerInfo(tezWork, rWork.getName(), e.getValue())); + } + if (checkForCrossProduct(rWork.getName(), reducer, rsInfo) + && cartesianProductEdgeEnabled && noOuterJoin) { + List parents = tezWork.getParents(null == origWrk ? wrk : origWrk); + for (BaseWork p: parents) { + TezEdgeProperty prop = tezWork.getEdgeProperty(p, null == origWrk ? wrk : origWrk); + LOG.info("Edge Type: "+prop.getEdgeType()); + if (prop.getEdgeType().equals(EdgeType.CUSTOM_SIMPLE_EDGE) + || prop.getEdgeType().equals(EdgeType.CUSTOM_EDGE)) { + prop.setEdgeType(EdgeType.XPROD_EDGE); + rWork.setNumReduceTasks(-1); + rWork.setMaxReduceTasks(-1); + rWork.setMinReduceTasks(-1); + } + } + } + } + } + } + + private void checkMRReducer(String taskName, MapredWork mrWrk) throws SemanticException { + ReduceWork rWrk = mrWrk.getReduceWork(); + if ( rWrk == null) { + return; + } + Operator reducer = rWrk.getReducer(); + if ( reducer instanceof JoinOperator|| reducer instanceof CommonMergeJoinOperator ) { + BaseWork parentWork = mrWrk.getMapWork(); + checkForCrossProduct(taskName, reducer, + new ExtractReduceSinkInfo(null).analyze(parentWork)); + } + } + + private boolean checkForCrossProduct(String taskName, + Operator reducer, + Map rsInfo) { + if ( rsInfo.isEmpty() ) { + return false; + } + Iterator it = rsInfo.values().iterator(); + ExtractReduceSinkInfo.Info info = it.next(); + if (info.keyCols.size() == 0) { + List iAliases = new ArrayList(); + iAliases.addAll(info.inputAliases); + while (it.hasNext()) { + info = it.next(); + iAliases.addAll(info.inputAliases); + } + String warning = String.format( + "Shuffle Join %s[tables = %s] in Stage '%s' is a cross product", + reducer.toString(), + iAliases, + taskName); + warn(warning); + return true; + } + return false; + } + + private Map getReducerInfo(TezWork tezWork, String vertex, String prntVertex) + throws SemanticException { + BaseWork parentWork = tezWork.getWorkMap().get(prntVertex); + return new ExtractReduceSinkInfo(vertex).analyze(parentWork); + } + + /* + * Given a Work descriptor and the TaskName for the work + * this is responsible to check each MapJoinOp for cross products. + * The analyze call returns the warnings list. + *

+ * For MR the taskname is the StageName, for Tez it is the vertex name. + */ + public static class MapJoinCheck implements NodeProcessor, NodeProcessorCtx { + + final List warnings; + final String taskName; + + MapJoinCheck(String taskName) { + this.taskName = taskName; + warnings = new ArrayList(); + } + + List analyze(BaseWork work) throws SemanticException { + Map opRules = new LinkedHashMap(); + opRules.put(new RuleRegExp("R1", MapJoinOperator.getOperatorName() + + "%"), this); + Dispatcher disp = new DefaultRuleDispatcher(new NoopProcessor(), opRules, this); + GraphWalker ogw = new DefaultGraphWalker(disp); + ArrayList topNodes = new ArrayList(); + topNodes.addAll(work.getAllRootOperators()); + ogw.startWalking(topNodes, null); + return warnings; + } + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + @SuppressWarnings("unchecked") + AbstractMapJoinOperator mjOp = (AbstractMapJoinOperator) nd; + MapJoinDesc mjDesc = mjOp.getConf(); + + String bigTablAlias = mjDesc.getBigTableAlias(); + if ( bigTablAlias == null ) { + Operator parent = null; + for(Operator op : mjOp.getParentOperators() ) { + if ( op instanceof TableScanOperator ) { + parent = op; + } + } + if ( parent != null) { + TableScanDesc tDesc = ((TableScanOperator)parent).getConf(); + bigTablAlias = tDesc.getAlias(); + } + } + bigTablAlias = bigTablAlias == null ? "?" : bigTablAlias; + + List joinExprs = mjDesc.getKeys().values().iterator().next(); + + if ( joinExprs.size() == 0 ) { + warnings.add( + String.format("Map Join %s[bigTable=%s] in task '%s' is a cross product", + mjOp.toString(), bigTablAlias, taskName)); + } + + return null; + } + } + + /* + * for a given Work Descriptor, it extracts information about the ReduceSinkOps + * in the Work. For Tez, you can restrict it to ReduceSinks for a particular output + * vertex. + */ + public static class ExtractReduceSinkInfo implements NodeProcessor, NodeProcessorCtx { + + static class Info { + List keyCols; + List inputAliases; + + Info(List keyCols, List inputAliases) { + this.keyCols = keyCols; + this.inputAliases = inputAliases == null ? new ArrayList() : inputAliases; + } + + Info(List keyCols, String[] inputAliases) { + this.keyCols = keyCols; + this.inputAliases = inputAliases == null ? new ArrayList() : Arrays.asList(inputAliases); + } + } + + final String outputTaskName; + final Map reduceSinkInfo; + + ExtractReduceSinkInfo(String parentTaskName) { + this.outputTaskName = parentTaskName; + reduceSinkInfo = new HashMap(); + } + + Map analyze(BaseWork work) throws SemanticException { + Map opRules = new LinkedHashMap(); + opRules.put(new RuleRegExp("R1", ReduceSinkOperator.getOperatorName() + + "%"), this); + Dispatcher disp = new DefaultRuleDispatcher(new NoopProcessor(), opRules, this); + GraphWalker ogw = new DefaultGraphWalker(disp); + ArrayList topNodes = new ArrayList(); + topNodes.addAll(work.getAllRootOperators()); + ogw.startWalking(topNodes, null); + return reduceSinkInfo; + } + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + ReduceSinkOperator rsOp = (ReduceSinkOperator) nd; + ReduceSinkDesc rsDesc = rsOp.getConf(); + + if ( outputTaskName != null ) { + String rOutputName = rsDesc.getOutputName(); + if ( rOutputName == null || !outputTaskName.equals(rOutputName)) { + return null; + } + } + + reduceSinkInfo.put(rsDesc.getTag(), + new Info(rsDesc.getKeyCols(), rsOp.getInputAliases())); + + return null; + } + } + + static class NoopProcessor implements NodeProcessor { + @Override + public final Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + return nd; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java index 9377563..c040406 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java @@ -82,7 +82,7 @@ private void initialize(HiveConf hiveConf) { } if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_CHECK_CROSS_PRODUCT)) { - resolvers.add(new CrossProductCheck()); + resolvers.add(new CrossProductHandler()); } // Vectorization should be the last optimization, because it doesn't modify the plan diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java index 7f3b1b3..9f14c66 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java @@ -92,9 +92,9 @@ private void checkShuffleJoin(SparkWork sparkWork) throws SemanticException { for (ReduceWork reduceWork : sparkWork.getAllReduceWork()) { Operator reducer = reduceWork.getReducer(); if (reducer instanceof JoinOperator || reducer instanceof CommonMergeJoinOperator) { - Map rsInfo = new TreeMap(); + Map rsInfo = new TreeMap(); for (BaseWork parent : sparkWork.getParents(reduceWork)) { - rsInfo.putAll(new CrossProductCheck.ExtractReduceSinkInfo(null).analyze(parent)); + rsInfo.putAll(new CrossProductHandler.ExtractReduceSinkInfo(null).analyze(parent)); } checkForCrossProduct(reduceWork.getName(), reducer, rsInfo); } @@ -105,7 +105,7 @@ private void checkMapJoin(SparkTask sparkTask) throws SemanticException { SparkWork sparkWork = sparkTask.getWork(); for (BaseWork baseWork : sparkWork.getAllWork()) { List warnings = - new CrossProductCheck.MapJoinCheck(sparkTask.toString()).analyze(baseWork); + new CrossProductHandler.MapJoinCheck(sparkTask.toString()).analyze(baseWork); for (String w : warnings) { warn(w); } @@ -114,12 +114,12 @@ private void checkMapJoin(SparkTask sparkTask) throws SemanticException { private void checkForCrossProduct(String workName, Operator reducer, - Map rsInfo) { + Map rsInfo) { if (rsInfo.isEmpty()) { return; } - Iterator it = rsInfo.values().iterator(); - CrossProductCheck.ExtractReduceSinkInfo.Info info = it.next(); + Iterator it = rsInfo.values().iterator(); + CrossProductHandler.ExtractReduceSinkInfo.Info info = it.next(); if (info.keyCols.size() == 0) { List iAliases = new ArrayList(); iAliases.addAll(info.inputAliases); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index 15836ec..da30c3b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -80,7 +80,7 @@ import org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkJoinDeDuplication; import org.apache.hadoop.hive.ql.optimizer.metainfo.annotation.AnnotateWithOpTraits; import org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer; -import org.apache.hadoop.hive.ql.optimizer.physical.CrossProductCheck; +import org.apache.hadoop.hive.ql.optimizer.physical.CrossProductHandler; import org.apache.hadoop.hive.ql.optimizer.physical.LlapClusterStateForCompile; import org.apache.hadoop.hive.ql.optimizer.physical.LlapDecider; import org.apache.hadoop.hive.ql.optimizer.physical.LlapPreVectorizationPass; @@ -658,7 +658,7 @@ protected void optimizeTaskPlan(List> rootTasks, Pa } if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CHECK_CROSS_PRODUCT)) { - physicalCtx = new CrossProductCheck().resolve(physicalCtx); + physicalCtx = new CrossProductHandler().resolve(physicalCtx); } else { LOG.debug("Skipping cross product analysis"); } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/TezEdgeProperty.java ql/src/java/org/apache/hadoop/hive/ql/plan/TezEdgeProperty.java index bbed9be..d43b81a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/TezEdgeProperty.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TezEdgeProperty.java @@ -28,7 +28,8 @@ CONTAINS,//used for union (all?) CUSTOM_EDGE,//CO_PARTITION_EDGE CUSTOM_SIMPLE_EDGE,//PARTITION_EDGE - ONE_TO_ONE_EDGE + ONE_TO_ONE_EDGE, + XPROD_EDGE } private HiveConf hiveConf; @@ -107,4 +108,5 @@ public void setSlowStart(boolean slowStart) { public void setEdgeType(EdgeType type) { this.edgeType = type; } + } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java index 2dc334d..47aa936 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java @@ -109,8 +109,8 @@ public Vertex answer(InvocationOnMock invocation) throws Throwable { }); when(utils.createEdge(any(JobConf.class), any(Vertex.class), any(Vertex.class), - any(TezEdgeProperty.class), any(VertexType.class))).thenAnswer(new Answer() { - + any(TezEdgeProperty.class), any(BaseWork.class), any(TezWork.class))) + .thenAnswer(new Answer() { @Override public Edge answer(InvocationOnMock invocation) throws Throwable { Object[] args = invocation.getArguments(); diff --git ql/src/test/queries/clientpositive/cross_prod_1.q ql/src/test/queries/clientpositive/cross_prod_1.q new file mode 100644 index 0000000..b5a84ea --- /dev/null +++ ql/src/test/queries/clientpositive/cross_prod_1.q @@ -0,0 +1,34 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.tez.cartesian-product.enabled=true; + +create table X as +select distinct * from src order by key limit 10; + +explain select * from X as A, X as B order by A.key, B.key; +select * from X as A, X as B order by A.key, B.key; + +explain select * from X as A join X as B on A.key 'Brand#14' AND p_size NOT IN (select select * from part where p_brand <> 'Brand#14' AND p_size NOT IN (select (p_size*p_size) from part p where p.p_type = part.p_type ) AND p_size <> 340; --lhs contains non-simple expression -explain select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type); -select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type); +explain select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_partkey; +select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_partkey; explain select * from part where (p_partkey*p_size) NOT IN (select min(p_partkey) from part group by p_type); select * from part where (p_partkey*p_size) NOT IN (select min(p_partkey) from part group by p_type); diff --git ql/src/test/queries/clientpositive/subquery_select.q ql/src/test/queries/clientpositive/subquery_select.q index 15377a4..c1766ff 100644 --- ql/src/test/queries/clientpositive/subquery_select.q +++ ql/src/test/queries/clientpositive/subquery_select.q @@ -155,8 +155,8 @@ SELECT p_size, (SELECT count(p_size) FROM part p WHERE p.p_type = part.p_type) IS NULL from part; -- scalar, non-corr, non agg -explain select p_type, (select p_size from part order by p_size limit 1) = 1 from part; -select p_type, (select p_size from part order by p_size limit 1) = 1 from part; +explain select p_type, (select p_size from part order by p_size limit 1) = 1 from part order by p_type; +select p_type, (select p_size from part order by p_size limit 1) = 1 from part order by p_type; -- in corr, multiple EXPLAIN SELECT p_size, p_size IN ( diff --git ql/src/test/results/clientpositive/llap/auto_join0.q.out ql/src/test/results/clientpositive/llap/auto_join0.q.out index 7f0a878..29945ad 100644 --- ql/src/test/results/clientpositive/llap/auto_join0.q.out +++ ql/src/test/results/clientpositive/llap/auto_join0.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[22][tables = [src1, src2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select sum(hash(a.k1,a.v1,a.k2, a.v2)) from ( @@ -30,9 +30,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (BROADCAST_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 5 (XPROD_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -64,28 +65,33 @@ STAGE PLANS: expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Reducer 4 - Statistics: Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) Reducer 3 Execution mode: llap Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) mode: mergepartial @@ -98,7 +104,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 5 Execution mode: llap Reduce Operator Tree: Select Operator @@ -116,7 +122,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[22][tables = [src1, src2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select sum(hash(a.k1,a.v1,a.k2, a.v2)) from ( SELECT src1.key as k1, src1.value as v1, diff --git ql/src/test/results/clientpositive/llap/auto_join_filters.q.out ql/src/test/results/clientpositive/llap/auto_join_filters.q.out index d1d9408..079f047 100644 --- ql/src/test/results/clientpositive/llap/auto_join_filters.q.out +++ ql/src/test/results/clientpositive/llap/auto_join_filters.q.out @@ -14,7 +14,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE my POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@myinput1 -Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[18][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -300,7 +300,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in2.txt' into table sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_input2 -Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[18][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 diff --git ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out index 5984e8f..04da1f2 100644 --- ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out +++ ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out @@ -14,7 +14,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE my POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@myinput1 -Warning: Map Join MAPJOIN[14][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 diff --git ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out index 6ef1f34..3acbb20 100644 --- ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out +++ ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out @@ -134,7 +134,7 @@ POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket3out POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_medium@ds=2008-04-08 -Warning: Map Join MAPJOIN[34][bigTable=?] in task 'Map 3' is a cross product +Warning: Shuffle Join MERGEJOIN[34][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key @@ -148,8 +148,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 3 <- Map 1 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Map 3 <- Map 1 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (XPROD_EDGE), Map 6 (XPROD_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -336,29 +337,12 @@ STAGE PLANS: 1 Map 2 Position of Big Table: 2 Statistics: Num rows: 244 Data size: 43381 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 5 => 1 - keys: - 0 - 1 - input vertices: - 1 Map 5 - Position of Big Table: 0 - Statistics: Num rows: 244 Data size: 45577 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 244 Data size: 43381 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -465,7 +449,7 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_big/ds=2008-04-08 [c] /bucket_big/ds=2008-04-09 [c] - Map 5 + Map 6 Map Operator Tree: TableScan alias: d @@ -539,6 +523,30 @@ STAGE PLANS: Execution mode: llap Needs Tagging: false Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Position of Big Table: 0 + Statistics: Num rows: 244 Data size: 45577 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 5 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial @@ -573,7 +581,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[34][bigTable=?] in task 'Map 3' is a cross product +Warning: Shuffle Join MERGEJOIN[34][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@bucket_big diff --git ql/src/test/results/clientpositive/llap/cross_join.q.out ql/src/test/results/clientpositive/llap/cross_join.q.out index 9d664af..6bde893 100644 --- ql/src/test/results/clientpositive/llap/cross_join.q.out +++ ql/src/test/results/clientpositive/llap/cross_join.q.out @@ -12,7 +12,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -81,7 +81,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -215,7 +215,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select src.key from src join src src2 PREHOOK: type: QUERY POSTHOOK: query: explain select src.key from src join src src2 @@ -229,7 +229,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -241,26 +241,13 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0 - input vertices: - 1 Map 2 - Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 2 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -272,6 +259,24 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0 + Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -279,7 +284,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select src.key from src cross join src src2 PREHOOK: type: QUERY POSTHOOK: query: explain select src.key from src cross join src src2 @@ -293,7 +298,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -305,26 +310,13 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0 - input vertices: - 1 Map 2 - Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 2 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -336,6 +328,24 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0 + Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/cross_prod_1.q.out ql/src/test/results/clientpositive/llap/cross_prod_1.q.out new file mode 100644 index 0000000..fd03fe5 --- /dev/null +++ ql/src/test/results/clientpositive/llap/cross_prod_1.q.out @@ -0,0 +1,2502 @@ +PREHOOK: query: create table X as +select distinct * from src order by key limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@X +POSTHOOK: query: create table X as +select distinct * from src order by key limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@X +POSTHOOK: Lineage: x.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: x.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +Warning: Shuffle Join MERGEJOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain select * from X as A, X as B order by A.key, B.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from X as A, X as B order by A.key, B.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (XPROD_EDGE), Map 4 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 100 Data size: 73700 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: string) + sort order: ++ + Statistics: Num rows: 100 Data size: 73700 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col3 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 100 Data size: 73700 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 73700 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select * from X as A, X as B order by A.key, B.key +PREHOOK: type: QUERY +PREHOOK: Input: default@x +#### A masked pattern was here #### +POSTHOOK: query: select * from X as A, X as B order by A.key, B.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@x +#### A masked pattern was here #### +0 val_0 0 val_0 +0 val_0 10 val_10 +0 val_0 100 val_100 +0 val_0 103 val_103 +0 val_0 104 val_104 +0 val_0 105 val_105 +0 val_0 11 val_11 +0 val_0 111 val_111 +0 val_0 113 val_113 +0 val_0 114 val_114 +10 val_10 0 val_0 +10 val_10 10 val_10 +10 val_10 100 val_100 +10 val_10 103 val_103 +10 val_10 104 val_104 +10 val_10 105 val_105 +10 val_10 11 val_11 +10 val_10 111 val_111 +10 val_10 113 val_113 +10 val_10 114 val_114 +100 val_100 0 val_0 +100 val_100 10 val_10 +100 val_100 100 val_100 +100 val_100 103 val_103 +100 val_100 104 val_104 +100 val_100 105 val_105 +100 val_100 11 val_11 +100 val_100 111 val_111 +100 val_100 113 val_113 +100 val_100 114 val_114 +103 val_103 0 val_0 +103 val_103 10 val_10 +103 val_103 100 val_100 +103 val_103 103 val_103 +103 val_103 104 val_104 +103 val_103 105 val_105 +103 val_103 11 val_11 +103 val_103 111 val_111 +103 val_103 113 val_113 +103 val_103 114 val_114 +104 val_104 0 val_0 +104 val_104 10 val_10 +104 val_104 100 val_100 +104 val_104 103 val_103 +104 val_104 104 val_104 +104 val_104 105 val_105 +104 val_104 11 val_11 +104 val_104 111 val_111 +104 val_104 113 val_113 +104 val_104 114 val_114 +105 val_105 0 val_0 +105 val_105 10 val_10 +105 val_105 100 val_100 +105 val_105 103 val_103 +105 val_105 104 val_104 +105 val_105 105 val_105 +105 val_105 11 val_11 +105 val_105 111 val_111 +105 val_105 113 val_113 +105 val_105 114 val_114 +11 val_11 0 val_0 +11 val_11 10 val_10 +11 val_11 100 val_100 +11 val_11 103 val_103 +11 val_11 104 val_104 +11 val_11 105 val_105 +11 val_11 11 val_11 +11 val_11 111 val_111 +11 val_11 113 val_113 +11 val_11 114 val_114 +111 val_111 0 val_0 +111 val_111 10 val_10 +111 val_111 100 val_100 +111 val_111 103 val_103 +111 val_111 104 val_104 +111 val_111 105 val_105 +111 val_111 11 val_11 +111 val_111 111 val_111 +111 val_111 113 val_113 +111 val_111 114 val_114 +113 val_113 0 val_0 +113 val_113 10 val_10 +113 val_113 100 val_100 +113 val_113 103 val_103 +113 val_113 104 val_104 +113 val_113 105 val_105 +113 val_113 11 val_11 +113 val_113 111 val_111 +113 val_113 113 val_113 +113 val_113 114 val_114 +114 val_114 0 val_0 +114 val_114 10 val_10 +114 val_114 100 val_100 +114 val_114 103 val_103 +114 val_114 104 val_104 +114 val_114 105 val_105 +114 val_114 11 val_11 +114 val_114 111 val_111 +114 val_114 113 val_113 +114 val_114 114 val_114 +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain select * from X as A join X as B on A.key 4:boolean) -> 5:String - Statistics: Num rows: 25 Data size: 425 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 25 Data size: 425 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -140,10 +115,10 @@ STAGE PLANS: enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: true + allNative: true + usesVectorUDFAdaptor: false vectorized: true - Map 2 + Map 3 Map Operator Tree: TableScan alias: tsint @@ -177,6 +152,28 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 25 Data size: 425 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint), CASE WHEN (_col1 BETWEEN UDFToInteger(_col3) AND UDFToInteger(_col3)) THEN ('Ok') ELSE ('NoOk') END (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 25 Data size: 425 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 425 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -184,7 +181,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint PREHOOK: type: QUERY PREHOOK: Input: default@tint @@ -221,7 +218,7 @@ tint.rnum tsint.rnum tint.cint tsint.csint between_col 4 2 10 0 NoOk 4 3 10 1 NoOk 4 4 10 10 Ok -Warning: Map Join MAPJOIN[10][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[10][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain vectorization expression select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint PREHOOK: type: QUERY @@ -242,7 +239,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -261,45 +258,14 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 1] Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - Map Join Vectorization: - className: VectorMapJoinInnerMultiKeyOperator + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 25 Data size: 425 Basic stats: COMPLETE Column stats: NONE - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsTrue(col 4)(children: VectorUDFAdaptor(_col1 BETWEEN UDFToInteger(_col3) AND UDFToInteger(_col3))(children: col 3, col 3) -> 4:boolean) -> boolean - predicate: _col1 BETWEEN UDFToInteger(_col3) AND UDFToInteger(_col3) (type: boolean) - Statistics: Num rows: 2 Data size: 34 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 2, 1, 3] - Statistics: Num rows: 2 Data size: 34 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 2 Data size: 34 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -307,10 +273,10 @@ STAGE PLANS: enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: true + allNative: true + usesVectorUDFAdaptor: false vectorized: true - Map 2 + Map 3 Map Operator Tree: TableScan alias: tsint @@ -344,6 +310,31 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 25 Data size: 425 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 BETWEEN UDFToInteger(_col3) AND UDFToInteger(_col3) (type: boolean) + Statistics: Num rows: 2 Data size: 34 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 34 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 34 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -351,7 +342,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[10][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[10][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint PREHOOK: type: QUERY PREHOOK: Input: default@tint diff --git ql/src/test/results/clientpositive/llap/vector_complex_all.q.out ql/src/test/results/clientpositive/llap/vector_complex_all.q.out index 2f3f886..2268a15 100644 --- ql/src/test/results/clientpositive/llap/vector_complex_all.q.out +++ ql/src/test/results/clientpositive/llap/vector_complex_all.q.out @@ -610,7 +610,7 @@ b str two line1 four line2 six line3 -Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 4' is a cross product +Warning: Shuffle Join MERGEJOIN[15][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_0]] in Stage 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN VECTORIZATION DETAIL INSERT INTO TABLE orc_create_complex SELECT orc_create_staging.*, src1.key FROM orc_create_staging cross join src src1 cross join orc_create_staging spam1 cross join orc_create_staging spam2 @@ -635,7 +635,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 4 <- Map 1 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE), Map 4 (XPROD_EDGE), Map 5 (XPROD_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -678,7 +678,7 @@ STAGE PLANS: includeColumns: [0, 1, 2, 3] dataColumns: str:string, mp:map, lst:array, strct:struct partitionColumnCount: 0 - Map 2 + Map 3 Map Operator Tree: TableScan alias: spam2 @@ -715,7 +715,7 @@ STAGE PLANS: includeColumns: [] dataColumns: str:string, mp:map, lst:array, strct:struct partitionColumnCount: 0 - Map 3 + Map 4 Map Operator Tree: TableScan alias: spam1 @@ -752,7 +752,7 @@ STAGE PLANS: includeColumns: [] dataColumns: str:string, mp:map, lst:array, strct:struct partitionColumnCount: 0 - Map 4 + Map 5 Map Operator Tree: TableScan alias: src1 @@ -768,53 +768,23 @@ STAGE PLANS: native: true projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - Inner Join 0 to 3 - keys: - 0 - 1 - 2 - 3 - Map Join Vectorization: - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: One MapJoin Condition IS false - outputColumnNames: _col0, _col1, _col2, _col3, _col6 - input vertices: - 0 Map 1 - 1 Map 2 - 2 Map 3 - Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: map), _col2 (type: array), _col3 (type: struct), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1, 2, 3, 4] - Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.orc_create_complex + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0] + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) Execution mode: vectorized, llap Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -822,7 +792,33 @@ STAGE PLANS: includeColumns: [0] dataColumns: key:string, value:string partitionColumnCount: 0 - scratchColumnTypeNames: string, map, array, struct + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + Inner Join 0 to 3 + keys: + 0 + 1 + 2 + 3 + outputColumnNames: _col0, _col1, _col2, _col3, _col6 + Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: map), _col2 (type: array), _col3 (type: struct), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_create_complex Stage: Stage-2 Dependency Collection @@ -840,7 +836,7 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator -Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 4' is a cross product +Warning: Shuffle Join MERGEJOIN[15][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_0]] in Stage 'Reducer 2' is a cross product PREHOOK: query: INSERT INTO TABLE orc_create_complex SELECT orc_create_staging.*, src1.key FROM orc_create_staging cross join src src1 cross join orc_create_staging spam1 cross join orc_create_staging spam2 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out index e43b4d1..e644f14 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain vectorization expression select * from src @@ -26,10 +26,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 4 (BROADCAST_EDGE), Reducer 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 5 (XPROD_EDGE), Reducer 6 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -48,58 +48,14 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - Map Join Vectorization: - className: VectorMapJoinInnerMultiKeyOperator + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Reducer 4 - Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Map Join Vectorization: - className: VectorMapJoinOuterStringOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col0, _col1, _col2, _col3, _col5 - input vertices: - 1 Reducer 5 - Statistics: Num rows: 500 Data size: 98620 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 2, val 0) -> boolean, FilterExprAndExpr(children: SelectColumnIsNull(col 4) -> boolean, SelectColumnIsNotNull(col 0) -> boolean, FilterLongColGreaterEqualLongColumn(col 3, col 2) -> boolean) -> boolean) -> boolean - predicate: ((_col2 = 0) or (_col5 is null and _col0 is not null and (_col3 >= _col2))) (type: boolean) - Statistics: Num rows: 500 Data size: 98620 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1] - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: @@ -110,7 +66,7 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 3 + Map 4 Map Operator Tree: TableScan alias: src @@ -180,6 +136,39 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col5 + input vertices: + 1 Reducer 6 + Statistics: Num rows: 500 Data size: 98620 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((_col2 = 0) or (_col5 is null and _col0 is not null and (_col3 >= _col2))) (type: boolean) + Statistics: Num rows: 500 Data size: 98620 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -207,7 +196,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 5 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -238,7 +227,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 5 + Reducer 6 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -287,7 +276,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from src where not key in @@ -316,7 +305,7 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@orcsrc POSTHOOK: Lineage: orcsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orcsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from orcsrc where not key in @@ -333,7 +322,7 @@ order by key POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### -Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from orcsrc where not key in diff --git ql/src/test/results/clientpositive/llap/vector_include_no_sel.q.out ql/src/test/results/clientpositive/llap/vector_include_no_sel.q.out index 9eadbb6..a78a79b 100644 --- ql/src/test/results/clientpositive/llap/vector_include_no_sel.q.out +++ ql/src/test/results/clientpositive/llap/vector_include_no_sel.q.out @@ -162,7 +162,7 @@ POSTHOOK: Lineage: customer_demographics.cd_education_status SIMPLE [(customer_d POSTHOOK: Lineage: customer_demographics.cd_gender SIMPLE [(customer_demographics_txt)customer_demographics_txt.FieldSchema(name:cd_gender, type:string, comment:null), ] POSTHOOK: Lineage: customer_demographics.cd_marital_status SIMPLE [(customer_demographics_txt)customer_demographics_txt.FieldSchema(name:cd_marital_status, type:string, comment:null), ] POSTHOOK: Lineage: customer_demographics.cd_purchase_estimate SIMPLE [(customer_demographics_txt)customer_demographics_txt.FieldSchema(name:cd_purchase_estimate, type:int, comment:null), ] -Warning: Map Join MAPJOIN[13][bigTable=store_sales] in task 'Map 2' is a cross product +Warning: Shuffle Join MERGEJOIN[13][tables = [customer_demographics, store_sales]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain vectorization expression select count(1) from customer_demographics,store_sales where ((customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'M') or @@ -186,8 +186,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Map 4 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -217,7 +217,7 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Map 2 + Map 4 Map Operator Tree: TableScan alias: store_sales @@ -225,55 +225,15 @@ STAGE PLANS: TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - Map Join Vectorization: - className: VectorMapJoinInnerMultiKeyOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col0, _col2, _col16 - input vertices: - 0 Map 1 - Statistics: Num rows: 200000 Data size: 36868800 Basic stats: COMPLETE Column stats: NONE - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColEqualLongColumn(col 23, col 4) -> boolean, FilterStringGroupColEqualStringScalar(col 24, val M) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColEqualLongColumn(col 23, col 4) -> boolean, FilterStringGroupColEqualStringScalar(col 24, val U) -> boolean) -> boolean) -> boolean - predicate: (((_col0 = _col16) and (_col2 = 'M')) or ((_col0 = _col16) and (_col2 = 'U'))) (type: boolean) - Statistics: Num rows: 100000 Data size: 18434400 Basic stats: COMPLETE Column stats: NONE - Select Operator - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [] - Statistics: Num rows: 100000 Data size: 18434400 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - Group By Vectorization: - aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 25:long) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - vectorOutput: true - native: false - vectorProcessingMode: HASH - projectedOutputColumns: [0] - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.vectorized.execution.reducesink.new.enabled IS false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.vectorized.execution.reducesink.new.enabled IS false + Statistics: Num rows: 1000 Data size: 3804 Basic stats: COMPLETE Column stats: NONE + value expressions: ss_cdemo_sk (type: int) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -284,6 +244,37 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col2, _col16 + Statistics: Num rows: 200000 Data size: 36868800 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col0 = _col16) and (_col2 = 'M')) or ((_col0 = _col16) and (_col2 = 'U'))) (type: boolean) + Statistics: Num rows: 100000 Data size: 18434400 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 100000 Data size: 18434400 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + Group By Vectorization: + groupByMode: HASH + vectorOutput: false + native: false + vectorProcessingMode: NONE + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -324,7 +315,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[13][bigTable=store_sales] in task 'Map 2' is a cross product +Warning: Shuffle Join MERGEJOIN[13][tables = [customer_demographics, store_sales]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select count(1) from customer_demographics,store_sales where ((customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'M') or (customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'U')) diff --git ql/src/test/results/clientpositive/llap/vector_join_filters.q.out ql/src/test/results/clientpositive/llap/vector_join_filters.q.out index 1a492b6..4e5205f 100644 --- ql/src/test/results/clientpositive/llap/vector_join_filters.q.out +++ ql/src/test/results/clientpositive/llap/vector_join_filters.q.out @@ -26,7 +26,7 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@myinput1 POSTHOOK: Lineage: myinput1.key SIMPLE [(myinput1_txt)myinput1_txt.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: myinput1.value SIMPLE [(myinput1_txt)myinput1_txt.FieldSchema(name:value, type:int, comment:null), ] -Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[18][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 diff --git ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out index 3497164..056360f 100644 --- ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out +++ ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out @@ -26,7 +26,7 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@myinput1 POSTHOOK: Lineage: myinput1.key SIMPLE [(myinput1_txt)myinput1_txt.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: myinput1.value SIMPLE [(myinput1_txt)myinput1_txt.FieldSchema(name:value, type:int, comment:null), ] -Warning: Map Join MAPJOIN[14][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 diff --git ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index 16cae79..7a4fe36 100644 --- ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -2375,7 +2375,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -2524,7 +2524,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Map 4 (XPROD_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -5507,7 +5507,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### 1000 -Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN VECTORIZATION select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' @@ -5525,8 +5525,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -5540,24 +5541,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Map 2 - Map Operator Tree: - TableScan - alias: srcpart - filterExpr: (ds = '2008-04-08') (type: boolean) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: '2008-04-08' (type: string) mode: hash @@ -5578,6 +5561,25 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Statistics: Num rows: 500000 Data size: 11124000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -5589,30 +5591,17 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - input vertices: - 0 Map 1 - Statistics: Num rows: 500000 Data size: 11124000 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -5624,17 +5613,15 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -5642,7 +5629,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart diff --git ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out index 2c66856..052fda6 100644 --- ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[43][bigTable=?] in task 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from ( select count(*) as h8_30_to_9 @@ -32,9 +32,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 3 (BROADCAST_EDGE) - Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) + Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -56,7 +57,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) input vertices: - 1 Map 4 + 1 Map 5 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() @@ -74,7 +75,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) input vertices: - 1 Map 5 + 1 Map 6 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() @@ -87,7 +88,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: src1 @@ -106,7 +107,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 177 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: src1 @@ -133,24 +134,29 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - input vertices: - 1 Reducer 3 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -169,7 +175,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[43][bigTable=?] in task 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select * from ( select count(*) as h8_30_to_9 from src diff --git ql/src/test/results/clientpositive/spark/subquery_multi.q.out ql/src/test/results/clientpositive/spark/subquery_multi.q.out index ff9b921..8a2b9b3 100644 --- ql/src/test/results/clientpositive/spark/subquery_multi.q.out +++ ql/src/test/results/clientpositive/spark/subquery_multi.q.out @@ -234,8 +234,8 @@ POSTHOOK: Input: default@part_null 17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the 45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful 48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i -78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith 78487 NULL Manufacturer#6 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith 192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir Warning: Shuffle Join JOIN[27][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product PREHOOK: query: explain select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_name from part_null) @@ -637,40 +637,40 @@ STAGE PLANS: ListSink Warning: Shuffle Join JOIN[27][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product -PREHOOK: query: select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_type from part_null) +PREHOOK: query: select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_type from part_null) order by part_null.p_partkey PREHOOK: type: QUERY PREHOOK: Input: default@part_null #### A masked pattern was here #### -POSTHOOK: query: select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_type from part_null) +POSTHOOK: query: select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_type from part_null) order by part_null.p_partkey POSTHOOK: type: QUERY POSTHOOK: Input: default@part_null #### A masked pattern was here #### -121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h -121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h -144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the 17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve 33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful -49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s 42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl -15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu -155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully -65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr -110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously -105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ -191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle -146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref -132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even -195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl +105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ +110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously 112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car -40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s -17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the -48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i -45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even +144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about +146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle 192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir -78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de Warning: Shuffle Join JOIN[28][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product PREHOOK: query: explain select * from part_null where p_brand IN (select p_brand from part_null) AND p_brand NOT IN (select p_name from part_null) PREHOOK: type: QUERY @@ -1233,42 +1233,42 @@ STAGE PLANS: ListSink Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product -PREHOOK: query: select * from part_null where p_name IN (select p_name from part_null) AND EXISTS (select c from tnull) +PREHOOK: query: select * from part_null where p_name IN (select p_name from part_null) AND EXISTS (select c from tnull) order by part_null.p_partkey PREHOOK: type: QUERY PREHOOK: Input: default@part_null PREHOOK: Input: default@tnull #### A masked pattern was here #### -POSTHOOK: query: select * from part_null where p_name IN (select p_name from part_null) AND EXISTS (select c from tnull) +POSTHOOK: query: select * from part_null where p_name IN (select p_name from part_null) AND EXISTS (select c from tnull) order by part_null.p_partkey POSTHOOK: type: QUERY POSTHOOK: Input: default@part_null POSTHOOK: Input: default@tnull #### A masked pattern was here #### -192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir -90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull -42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl -105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ -48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i -86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu -45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful -65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr -132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even -195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the 17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve 33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr 78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl +105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ +110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously +112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car 121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h 121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h -17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the -49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick -112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car -40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even 144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about -110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously +146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref 155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra 191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle -146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir +195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product PREHOOK: query: explain select * from part_null where p_size IN (select p_size from part_null) AND EXISTS (select c from tempty) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/subquery_notin.q.out ql/src/test/results/clientpositive/spark/subquery_notin.q.out index 0317288..1b2c088 100644 --- ql/src/test/results/clientpositive/spark/subquery_notin.q.out +++ ql/src/test/results/clientpositive/spark/subquery_notin.q.out @@ -1775,9 +1775,9 @@ POSTHOOK: Input: default@part 195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de 144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about Warning: Shuffle Join JOIN[26][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product -PREHOOK: query: explain select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) +PREHOOK: query: explain select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_partkey PREHOOK: type: QUERY -POSTHOOK: query: explain select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) +POSTHOOK: query: explain select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1787,12 +1787,13 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Map 4 (GROUP, 2) - Reducer 6 <- Reducer 5 (GROUP, 1) - Reducer 8 <- Map 4 (GROUP, 2) - Reducer 9 <- Reducer 8 (GROUP, 2) + Reducer 10 <- Reducer 9 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 7 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 6 <- Map 5 (GROUP, 2) + Reducer 7 <- Reducer 6 (GROUP, 1) + Reducer 9 <- Map 5 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -1808,7 +1809,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Map 4 + Map 5 Map Operator Tree: TableScan alias: part @@ -1829,6 +1830,23 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) + Reducer 10 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), true (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean) Reducer 2 Reduce Operator Tree: Join Operator @@ -1862,14 +1880,25 @@ STAGE PLANS: expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 18 Data size: 2537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + Statistics: Num rows: 18 Data size: 2537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: int), VALUE._col5 (type: string), VALUE._col6 (type: double), VALUE._col7 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 18 Data size: 2537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 18 Data size: 2537 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) @@ -1890,7 +1919,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 6 + Reducer 7 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), count(VALUE._col1) @@ -1901,7 +1930,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 8 + Reducer 9 Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) @@ -1923,23 +1952,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reducer 9 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), true (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: boolean) Stage: Stage-0 Fetch Operator @@ -1948,30 +1960,30 @@ STAGE PLANS: ListSink Warning: Shuffle Join JOIN[26][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product -PREHOOK: query: select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) +PREHOOK: query: select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_partkey PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) +POSTHOOK: query: select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_partkey POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### -110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s 42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl -49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick -90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even 45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful -192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir 48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i -40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s -33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful -105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ -17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr 78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull -65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl +105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ +110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously +132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even 144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about +192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir Warning: Shuffle Join JOIN[26][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select * from part where (p_partkey*p_size) NOT IN (select min(p_partkey) from part group by p_type) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/subquery_select.q.out ql/src/test/results/clientpositive/spark/subquery_select.q.out index e0651db..7d3a16b 100644 --- ql/src/test/results/clientpositive/spark/subquery_select.q.out +++ ql/src/test/results/clientpositive/spark/subquery_select.q.out @@ -2741,9 +2741,9 @@ POSTHOOK: Input: default@part 45 false Warning: Shuffle Join JOIN[23][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product Warning: Shuffle Join JOIN[26][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product -PREHOOK: query: explain select p_type, (select p_size from part order by p_size limit 1) = 1 from part +PREHOOK: query: explain select p_type, (select p_size from part order by p_size limit 1) = 1 from part order by p_type PREHOOK: type: QUERY -POSTHOOK: query: explain select p_type, (select p_size from part order by p_size limit 1) = 1 from part +POSTHOOK: query: explain select p_type, (select p_size from part order by p_size limit 1) = 1 from part order by p_type POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2753,11 +2753,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Map 4 (SORT, 1) - Reducer 6 <- Reducer 5 (GROUP, 1) - Reducer 8 <- Map 7 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 7 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 9 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 6 <- Map 5 (SORT, 1) + Reducer 7 <- Reducer 6 (GROUP, 1) + Reducer 9 <- Map 8 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2773,7 +2774,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 4 + Map 5 Map Operator Tree: TableScan alias: part @@ -2786,7 +2787,7 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map 7 + Map 8 Map Operator Tree: TableScan alias: part @@ -2828,14 +2829,25 @@ STAGE PLANS: expressions: _col0 (type: string), (_col2 = 1) (type: boolean) outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 6553 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + Statistics: Num rows: 26 Data size: 6553 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + value expressions: _col1 (type: boolean) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 6553 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 6553 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Reduce Operator Tree: Select Operator Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE @@ -2851,7 +2863,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 6 + Reducer 7 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2866,7 +2878,7 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reducer 8 + Reducer 9 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -2888,40 +2900,40 @@ STAGE PLANS: Warning: Shuffle Join JOIN[23][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product Warning: Shuffle Join JOIN[26][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product -PREHOOK: query: select p_type, (select p_size from part order by p_size limit 1) = 1 from part +PREHOOK: query: select p_type, (select p_size from part order by p_size limit 1) = 1 from part order by p_type PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_type, (select p_size from part order by p_size limit 1) = 1 from part +POSTHOOK: query: select p_type, (select p_size from part order by p_size limit 1) = 1 from part order by p_type POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### -PROMO PLATED TIN true -PROMO PLATED TIN true +ECONOMY BRUSHED COPPER true +ECONOMY BURNISHED STEEL true +ECONOMY PLATED COPPER true +ECONOMY POLISHED STEEL true +LARGE BRUSHED BRASS true LARGE BRUSHED STEEL true -PROMO BURNISHED NICKEL true -STANDARD ANODIZED STEEL true LARGE BURNISHED STEEL true MEDIUM ANODIZED COPPER true -ECONOMY POLISHED STEEL true +MEDIUM BURNISHED BRASS true MEDIUM BURNISHED COPPER true -SMALL POLISHED NICKEL true -STANDARD PLATED TIN true +MEDIUM BURNISHED TIN true MEDIUM BURNISHED TIN true PROMO ANODIZED TIN true -MEDIUM BURNISHED BRASS true -ECONOMY PLATED COPPER true -STANDARD POLISHED STEEL true -SMALL BRUSHED BRASS true +PROMO BURNISHED NICKEL true +PROMO PLATED TIN true +PROMO PLATED TIN true PROMO POLISHED STEEL true +SMALL BRUSHED BRASS true +SMALL PLATED BRASS true SMALL PLATED STEEL true -ECONOMY BRUSHED COPPER true +SMALL POLISHED NICKEL true +STANDARD ANODIZED STEEL true STANDARD ANODIZED TIN true -MEDIUM BURNISHED TIN true STANDARD BURNISHED TIN true -SMALL PLATED BRASS true -ECONOMY BURNISHED STEEL true -LARGE BRUSHED BRASS true +STANDARD PLATED TIN true +STANDARD POLISHED STEEL true Warning: Shuffle Join JOIN[48][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Work 'Reducer 4' is a cross product PREHOOK: query: EXPLAIN SELECT p_size, p_size IN ( SELECT MAX(p_size) FROM part p where p.p_type = part.p_type) AND diff --git ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out index 4dfcc33..a709920 100644 --- ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out +++ ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out @@ -1215,7 +1215,7 @@ POSTHOOK: Lineage: decimal_mapjoin.cdecimal1 EXPRESSION [(alltypesorc)alltypesor POSTHOOK: Lineage: decimal_mapjoin.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_mapjoin.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_mapjoin.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] -Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 FROM decimal_mapjoin l JOIN decimal_mapjoin r ON l.cint = r.cint @@ -1235,7 +1235,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1250,29 +1250,12 @@ STAGE PLANS: expressions: cdecimal1 (type: decimal(20,10)) outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 551 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col2 - input vertices: - 1 Map 2 - Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 6981 (type: int), 6981 (type: int), _col0 (type: decimal(20,10)), _col2 (type: decimal(23,14)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + sort order: + Statistics: Num rows: 5 Data size: 551 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(20,10)) Execution mode: vectorized - Map 2 + Map 3 Map Operator Tree: TableScan alias: r @@ -1289,6 +1272,27 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 551 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(23,14)) Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col2 + Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 6981 (type: int), 6981 (type: int), _col0 (type: decimal(20,10)), _col2 (type: decimal(23,14)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1296,7 +1300,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 FROM decimal_mapjoin l JOIN decimal_mapjoin r ON l.cint = r.cint @@ -1312,8 +1316,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_mapjoin #### A masked pattern was here #### 6981 6981 NULL NULL -6981 6981 NULL -617.56077692307690 -6981 6981 NULL -617.56077692307690 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL @@ -1321,14 +1323,13 @@ POSTHOOK: Input: default@decimal_mapjoin 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL -6981 6981 NULL NULL 6981 6981 NULL -617.56077692307690 6981 6981 NULL -617.56077692307690 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL -6981 6981 NULL 6984454.21109769200000 6981 6981 NULL NULL +6981 6981 NULL 6984454.21109769200000 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL @@ -1337,8 +1338,8 @@ POSTHOOK: Input: default@decimal_mapjoin 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL -6981 6981 NULL 6984454.21109769200000 6981 6981 NULL NULL +6981 6981 NULL 6984454.21109769200000 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL @@ -1347,13 +1348,14 @@ POSTHOOK: Input: default@decimal_mapjoin 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL +6981 6981 NULL NULL 6981 6981 NULL 6984454.21109769200000 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL +6981 6981 NULL -617.56077692307690 +6981 6981 NULL -617.56077692307690 6981 6981 5831542.2692483780 NULL -6981 6981 5831542.2692483780 -617.56077692307690 -6981 6981 5831542.2692483780 -617.56077692307690 6981 6981 5831542.2692483780 NULL 6981 6981 5831542.2692483780 NULL 6981 6981 5831542.2692483780 NULL @@ -1361,9 +1363,9 @@ POSTHOOK: Input: default@decimal_mapjoin 6981 6981 5831542.2692483780 NULL 6981 6981 5831542.2692483780 NULL 6981 6981 5831542.2692483780 NULL +6981 6981 5831542.2692483780 -617.56077692307690 +6981 6981 5831542.2692483780 -617.56077692307690 6981 6981 NULL NULL -6981 6981 NULL -617.56077692307690 -6981 6981 NULL -617.56077692307690 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL @@ -1371,14 +1373,13 @@ POSTHOOK: Input: default@decimal_mapjoin 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL -6981 6981 NULL NULL 6981 6981 NULL -617.56077692307690 6981 6981 NULL -617.56077692307690 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL -6981 6981 NULL 6984454.21109769200000 6981 6981 NULL NULL +6981 6981 NULL 6984454.21109769200000 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL @@ -1387,13 +1388,14 @@ POSTHOOK: Input: default@decimal_mapjoin 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL +6981 6981 NULL NULL 6981 6981 NULL 6984454.21109769200000 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL +6981 6981 NULL -617.56077692307690 +6981 6981 NULL -617.56077692307690 6981 6981 -515.6210729730 NULL -6981 6981 -515.6210729730 -617.56077692307690 -6981 6981 -515.6210729730 -617.56077692307690 6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 NULL @@ -1401,17 +1403,19 @@ POSTHOOK: Input: default@decimal_mapjoin 6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 NULL -6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 -617.56077692307690 6981 6981 -515.6210729730 -617.56077692307690 6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 NULL +6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 6984454.21109769200000 6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 NULL -Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Map 1' is a cross product +6981 6981 -515.6210729730 -617.56077692307690 +6981 6981 -515.6210729730 -617.56077692307690 +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 FROM decimal_mapjoin l JOIN decimal_mapjoin r ON l.cint = r.cint @@ -1431,7 +1435,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1446,29 +1450,12 @@ STAGE PLANS: expressions: cdecimal1 (type: decimal(20,10)) outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 551 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col2 - input vertices: - 1 Map 2 - Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 6981 (type: int), 6981 (type: int), _col0 (type: decimal(20,10)), _col2 (type: decimal(23,14)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + sort order: + Statistics: Num rows: 5 Data size: 551 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(20,10)) Execution mode: vectorized - Map 2 + Map 3 Map Operator Tree: TableScan alias: r @@ -1485,6 +1472,27 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 551 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(23,14)) Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col2 + Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 6981 (type: int), 6981 (type: int), _col0 (type: decimal(20,10)), _col2 (type: decimal(23,14)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 5535 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1492,7 +1500,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 FROM decimal_mapjoin l JOIN decimal_mapjoin r ON l.cint = r.cint @@ -1508,8 +1516,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_mapjoin #### A masked pattern was here #### 6981 6981 NULL NULL -6981 6981 NULL -617.56077692307690 -6981 6981 NULL -617.56077692307690 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL @@ -1517,14 +1523,13 @@ POSTHOOK: Input: default@decimal_mapjoin 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL -6981 6981 NULL NULL 6981 6981 NULL -617.56077692307690 6981 6981 NULL -617.56077692307690 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL -6981 6981 NULL 6984454.21109769200000 6981 6981 NULL NULL +6981 6981 NULL 6984454.21109769200000 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL @@ -1533,8 +1538,8 @@ POSTHOOK: Input: default@decimal_mapjoin 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL -6981 6981 NULL 6984454.21109769200000 6981 6981 NULL NULL +6981 6981 NULL 6984454.21109769200000 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL @@ -1543,13 +1548,14 @@ POSTHOOK: Input: default@decimal_mapjoin 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL +6981 6981 NULL NULL 6981 6981 NULL 6984454.21109769200000 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL +6981 6981 NULL -617.56077692307690 +6981 6981 NULL -617.56077692307690 6981 6981 5831542.2692483780 NULL -6981 6981 5831542.2692483780 -617.56077692307690 -6981 6981 5831542.2692483780 -617.56077692307690 6981 6981 5831542.2692483780 NULL 6981 6981 5831542.2692483780 NULL 6981 6981 5831542.2692483780 NULL @@ -1557,9 +1563,9 @@ POSTHOOK: Input: default@decimal_mapjoin 6981 6981 5831542.2692483780 NULL 6981 6981 5831542.2692483780 NULL 6981 6981 5831542.2692483780 NULL +6981 6981 5831542.2692483780 -617.56077692307690 +6981 6981 5831542.2692483780 -617.56077692307690 6981 6981 NULL NULL -6981 6981 NULL -617.56077692307690 -6981 6981 NULL -617.56077692307690 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL @@ -1567,14 +1573,13 @@ POSTHOOK: Input: default@decimal_mapjoin 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL -6981 6981 NULL NULL 6981 6981 NULL -617.56077692307690 6981 6981 NULL -617.56077692307690 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL -6981 6981 NULL 6984454.21109769200000 6981 6981 NULL NULL +6981 6981 NULL 6984454.21109769200000 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL @@ -1583,13 +1588,14 @@ POSTHOOK: Input: default@decimal_mapjoin 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL +6981 6981 NULL NULL 6981 6981 NULL 6984454.21109769200000 6981 6981 NULL NULL 6981 6981 NULL NULL 6981 6981 NULL NULL +6981 6981 NULL -617.56077692307690 +6981 6981 NULL -617.56077692307690 6981 6981 -515.6210729730 NULL -6981 6981 -515.6210729730 -617.56077692307690 -6981 6981 -515.6210729730 -617.56077692307690 6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 NULL @@ -1597,16 +1603,18 @@ POSTHOOK: Input: default@decimal_mapjoin 6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 NULL -6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 -617.56077692307690 6981 6981 -515.6210729730 -617.56077692307690 6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 NULL +6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 6984454.21109769200000 6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 NULL 6981 6981 -515.6210729730 NULL +6981 6981 -515.6210729730 -617.56077692307690 +6981 6981 -515.6210729730 -617.56077692307690 PREHOOK: query: DROP TABLE decimal_mapjoin PREHOOK: type: DROPTABLE PREHOOK: Input: default@decimal_mapjoin