diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 60ac0c0..da94d50 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2564,8 +2564,6 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { HIVE_LOG_TRACE_ID("hive.log.trace.id", "", "Log tracing id that can be used by upstream clients for tracking respective logs. " + "Truncated to " + LOG_PREFIX_LENGTH + " characters. Defaults to use auto-generated session id."), - - HIVE_CONF_RESTRICTED_LIST("hive.conf.restricted.list", "hive.security.authenticator.manager,hive.security.authorization.manager,hive.users.in.admin.role", "Comma separated list of configuration options which are immutable at runtime"), @@ -2574,7 +2572,14 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { "Comma separated list of configuration options which should not be read by normal user like passwords"), HIVE_CONF_INTERNAL_VARIABLE_LIST("hive.conf.internal.variable.list", "hive.added.files.path,hive.added.jars.path,hive.added.archives.path", - "Comma separated list of variables which are used internally and should not be configurable."); + "Comma separated list of variables which are used internally and should not be configurable."), + HIVE_TEZ_ENABLE_MEMORY_MANAGER_FOR_HASH_JOIN("hive.tez.enable.memory.manager.for.hash.join", + true, "Enable memory manager for hash join operators in tez"), + HIVE_TEZ_DEFAULT_BUFFER_SIZE("hive.tez.default.buffer.size", 32 * 1024 * 1024L, + "Tez default sort buffer size. Default is 32 MB. Set this appropriately to " + + "number of tables in your query * largest row size among these tables." + + "For e.g if you have a 2 way join and the row size of the 2 tables is 1 MB and 10KB," + + "set this value to 2 * 1MB in the minimum. Highly recommend to keep this value above 32 MB"); public final String varname; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java index 6e196e6..44e2b06 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java @@ -144,6 +144,7 @@ "hive.tez.current.merge.file.prefix"; // "A comma separated list of work names used as prefix. public static final String TEZ_MERGE_WORK_FILE_PREFIXES = "hive.tez.merge.file.prefixes"; + private static final int ONE_MB = 1024 * 1024; private void addCredentials(MapWork mapWork, DAG dag) { Set paths = mapWork.getPathToAliases().keySet(); @@ -338,7 +339,7 @@ public Edge createEdge(JobConf vConf, Vertex v, Vertex w, TezEdgeProperty edgePr * Helper function to create an edge property from an edge type. */ private EdgeProperty createEdgeProperty(TezEdgeProperty edgeProp, Configuration conf) - throws IOException { + throws IOException { MRHelpers.translateMRConfToTez(conf); String keyClass = conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS); String valClass = conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS); @@ -348,22 +349,23 @@ private EdgeProperty createEdgeProperty(TezEdgeProperty edgeProp, Configuration EdgeType edgeType = edgeProp.getEdgeType(); switch (edgeType) { case BROADCAST_EDGE: - UnorderedKVEdgeConfig et1Conf = UnorderedKVEdgeConfig - .newBuilder(keyClass, valClass) - .setFromConfiguration(conf) - .setKeySerializationClass(TezBytesWritableSerialization.class.getName(), null) - .setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null) - .build(); + UnorderedKVEdgeConfig et1Conf = + UnorderedKVEdgeConfig.newBuilder(keyClass, valClass).setFromConfiguration(conf) + .setKeySerializationClass(TezBytesWritableSerialization.class.getName(), null) + .setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null) + .build(); return et1Conf.createDefaultBroadcastEdgeProperty(); + case CUSTOM_EDGE: assert partitionerClassName != null; partitionerConf = createPartitionerConf(partitionerClassName, conf); - UnorderedPartitionedKVEdgeConfig et2Conf = UnorderedPartitionedKVEdgeConfig - .newBuilder(keyClass, valClass, MRPartitioner.class.getName(), partitionerConf) - .setFromConfiguration(conf) - .setKeySerializationClass(TezBytesWritableSerialization.class.getName(), null) - .setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null) - .build(); + UnorderedPartitionedKVEdgeConfig et2Conf = + UnorderedPartitionedKVEdgeConfig + .newBuilder(keyClass, valClass, MRPartitioner.class.getName(), partitionerConf) + .setFromConfiguration(conf) + .setKeySerializationClass(TezBytesWritableSerialization.class.getName(), null) + .setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null) + .build(); EdgeManagerPluginDescriptor edgeDesc = EdgeManagerPluginDescriptor.create(CustomPartitionEdge.class.getName()); CustomEdgeConfiguration edgeConf = @@ -373,27 +375,41 @@ private EdgeProperty createEdgeProperty(TezEdgeProperty edgeProp, Configuration byte[] userPayload = dob.getData(); edgeDesc.setUserPayload(UserPayload.create(ByteBuffer.wrap(userPayload))); return et2Conf.createDefaultCustomEdgeProperty(edgeDesc); + case CUSTOM_SIMPLE_EDGE: assert partitionerClassName != null; partitionerConf = createPartitionerConf(partitionerClassName, conf); - UnorderedPartitionedKVEdgeConfig et3Conf = UnorderedPartitionedKVEdgeConfig - .newBuilder(keyClass, valClass, MRPartitioner.class.getName(), partitionerConf) - .setFromConfiguration(conf) - .setKeySerializationClass(TezBytesWritableSerialization.class.getName(), null) - .setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null) - .build(); + UnorderedPartitionedKVEdgeConfig et3Conf = + UnorderedPartitionedKVEdgeConfig + .newBuilder(keyClass, valClass, MRPartitioner.class.getName(), partitionerConf) + .setFromConfiguration(conf) + .setKeySerializationClass(TezBytesWritableSerialization.class.getName(), null) + .setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null) + .build(); return et3Conf.createDefaultEdgeProperty(); + case SIMPLE_EDGE: default: assert partitionerClassName != null; partitionerConf = createPartitionerConf(partitionerClassName, conf); - OrderedPartitionedKVEdgeConfig et4Conf = OrderedPartitionedKVEdgeConfig + OrderedPartitionedKVEdgeConfig.Builder builder = + OrderedPartitionedKVEdgeConfig .newBuilder(keyClass, valClass, MRPartitioner.class.getName(), partitionerConf) .setFromConfiguration(conf) .setKeySerializationClass(TezBytesWritableSerialization.class.getName(), TezBytesComparator.class.getName(), null) - .setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null) - .build(); + .setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null); + + boolean memoryManagerEnabled = + HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_TEZ_ENABLE_MEMORY_MANAGER); + if (memoryManagerEnabled) { + LOG.info( + "Setting the sort buffer memory required to " + edgeProp.getSortBufferMemRequiredMB()); + builder = builder.configureOutput() + .setSortBufferSize((int) edgeProp.getSortBufferMemRequiredMB()).done(); + } + + OrderedPartitionedKVEdgeConfig et4Conf = builder.build(); return et4Conf.createDefaultEdgeProperty(); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index ea89cf0..d3e78f1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -54,6 +54,7 @@ import org.apache.hadoop.hive.ql.parse.OptimizeTezProcContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.CommonMergeJoinDesc; +import org.apache.hadoop.hive.ql.plan.DummyStoreDesc; import org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -305,6 +306,8 @@ private void convertJoinSMBJoin(JoinOperator joinOp, OptimizeTezProcContext cont dummyStoreOp.getParentOperators().add(parentOp); mergeJoinOp.getParentOperators().remove(parentIndex); mergeJoinOp.getParentOperators().add(parentIndex, dummyStoreOp); + dummyStoreOp.setConf(new DummyStoreDesc()); + dummyStoreOp.setStatistics(parentOp.getStatistics()); } } mergeJoinOp.cloneOriginalParentsList(mergeJoinOp.getParentOperators()); @@ -370,7 +373,7 @@ private boolean checkConvertJoinSMBJoin(JoinOperator joinOp, OptimizeTezProcCont // MapRecordProcessor and ReduceRecordProcessor with respect to the sources. @SuppressWarnings({"rawtypes","unchecked"}) Set set = - OperatorUtils.findOperatorsUpstream((Collection)parentOp.getParentOperators(), + OperatorUtils.findOperatorsUpstream(parentOp.getParentOperators(), ReduceSinkOperator.class); if (size < 0) { size = set.size(); @@ -587,7 +590,7 @@ public int getMapJoinConversionPos(JoinOperator joinOp, OptimizeTezProcContext c // on size and there's another one that's bigger. return -1; } - + if (inputSize/buckets > maxSize) { if (!bigTableCandidateSet.contains(pos)) { // can't use the current table as the big table, but it's too diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java index d5c3a2d..8e614cf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java @@ -280,7 +280,8 @@ public static Object processReduceSinkToHashJoin(ReduceSinkOperator parentRS, Ma // disable auto parallelism for bucket map joins parentRS.getConf().setReducerTraits(EnumSet.of(FIXED)); } - TezEdgeProperty edgeProp = new TezEdgeProperty(null, edgeType, numBuckets); + TezEdgeProperty edgeProp = + new TezEdgeProperty(null, edgeType, numBuckets, parentRS.getStatistics().getDataSize()); if (mapJoinWork != null) { for (BaseWork myWork: mapJoinWork) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java index 7149f5c..79b882b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java @@ -284,7 +284,6 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, List> bucketColsList = new ArrayList>(); List> sortColsList = new ArrayList>(); byte pos = 0; - int numReduceSinks = 0; // will be set to the larger of the parents for (Operator parentOp : joinOp.getParentOperators()) { if (!(parentOp instanceof ReduceSinkOperator)) { // can be mux operator diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MemoryDecider.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MemoryDecider.java index 3a20cfe..14f5477 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MemoryDecider.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MemoryDecider.java @@ -22,7 +22,6 @@ import java.util.Comparator; import java.util.Iterator; import java.util.HashMap; -import java.util.HashSet; import java.util.Map; import java.util.LinkedHashMap; import java.util.LinkedHashSet; @@ -35,12 +34,10 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; -import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; -import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.StatsTask; import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.exec.tez.DagUtils; import org.apache.hadoop.hive.ql.exec.tez.TezTask; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; @@ -55,12 +52,15 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.BaseWork; +import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MergeJoinWork; import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.plan.TezEdgeProperty; import org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType; import org.apache.hadoop.hive.ql.plan.TezWork; +import org.apache.hadoop.hive.ql.plan.UnionWork; +import org.apache.tez.runtime.library.api.TezRuntimeConfiguration; /** * MemoryDecider is a simple physical optimizer that adjusts the memory layout of tez tasks. @@ -71,20 +71,28 @@ public class MemoryDecider implements PhysicalPlanResolver { protected static transient final Logger LOG = LoggerFactory.getLogger(MemoryDecider.class); + private TezWork tezWork = null; public class MemoryCalculator implements Dispatcher { - private final long totalAvailableMemory; // how much to we have + private final long mapJoinTotalAvailableMemory; // how much to we have private final long minimumHashTableSize; // minimum size of ht completely in memory private final double inflationFactor; // blowout factor datasize -> memory size private final PhysicalContext pctx; + private final long defaultBufferSize; + private final long ioMemory; public MemoryCalculator(PhysicalContext pctx) { this.pctx = pctx; - this.totalAvailableMemory = HiveConf.getLongVar(pctx.conf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD); + this.mapJoinTotalAvailableMemory = HiveConf.getLongVar(pctx.conf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD); this.minimumHashTableSize = HiveConf.getIntVar(pctx.conf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS) * HiveConf.getIntVar(pctx.conf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINWBSIZE); this.inflationFactor = HiveConf.getFloatVar(pctx.conf, HiveConf.ConfVars.HIVE_HASH_TABLE_INFLATION_FACTOR); + this.defaultBufferSize = + HiveConf.getLongVar(pctx.conf, HiveConf.ConfVars.HIVE_TEZ_DEFAULT_BUFFER_SIZE); + LOG.info("Hive tez default buffer size: " + defaultBufferSize); + this.ioMemory = this.pctx.getConf().getLong(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, + defaultBufferSize); } @SuppressWarnings("unchecked") @@ -96,8 +104,8 @@ public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) currTask = ((StatsTask) currTask).getWork().getSourceTask(); } if (currTask instanceof TezTask) { - TezWork work = ((TezTask) currTask).getWork(); - for (BaseWork w : work.getAllWork()) { + tezWork = ((TezTask) currTask).getWork(); + for (BaseWork w : tezWork.getAllWork()) { evaluateWork(w); } } @@ -105,18 +113,47 @@ public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) } private void evaluateWork(BaseWork w) throws SemanticException { - + for (TezEdgeProperty edge : getInputEdges(w)) { + if (edge.getEdgeType() == EdgeType.SIMPLE_EDGE) { + if ((edge.getSortBufferMemRequiredMB() * 1024 * 1024) < defaultBufferSize) { + // hasn't been set. Default to configuration. + LOG.info("Setting the sort buffer memory required to " + defaultBufferSize); + edge.setSortBufferMemRequired(defaultBufferSize); + } + } + } if (w instanceof MapWork) { evaluateMapWork((MapWork) w); } else if (w instanceof ReduceWork) { evaluateReduceWork((ReduceWork) w); } else if (w instanceof MergeJoinWork) { evaluateMergeWork((MergeJoinWork) w); + } else if (w instanceof UnionWork) { + evaluateUnionWork((UnionWork) w, getInputEdges(w), getOutputEdges(w)); } else { LOG.info("We are not going to evaluate this work type: " + w.getClass().getCanonicalName()); } } + private void evaluateUnionWork(UnionWork w, List inputEdges, + List outputEdges) { + List realOutputEdges = new ArrayList(); + long totalInputSize = 0; + for (TezEdgeProperty edge : outputEdges) { + if (edge.getEdgeType() == TezEdgeProperty.EdgeType.CONTAINS) { + totalInputSize += edge.getDataFlowSize(); + } else { + realOutputEdges.add(edge); + } + } + + LOG.info("Real total input size for union " + w.getName() + " is " + totalInputSize); + for (TezEdgeProperty edge : realOutputEdges) { + edge.setDataFlowSize(totalInputSize); + } + return; + } + private void evaluateMergeWork(MergeJoinWork w) throws SemanticException { for (BaseWork baseWork : w.getBaseWorkList()) { evaluateOperators(baseWork, pctx); @@ -146,6 +183,16 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return null; } }); + final Set gbyOps = new LinkedHashSet(); + rules.put(new RuleRegExp("Sort buffer estimator", GroupByOperator.getOperatorName() + "%"), + new NodeProcessor() { + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) { + gbyOps.add((GroupByOperator) nd); + return null; + } + }); disp = new DefaultRuleDispatcher(null, rules, null); GraphWalker ogw = new DefaultGraphWalker(disp); @@ -156,6 +203,10 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, LinkedHashMap nodeOutput = new LinkedHashMap(); ogw.startWalking(topNodes, nodeOutput); + if (gbyOps.size() != 0) { + evaluateGbyOps(w, gbyOps); + } + if (mapJoins.size() == 0) { return; } @@ -174,6 +225,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } Comparator comp = new Comparator() { + @Override public int compare(MapJoinOperator mj1, MapJoinOperator mj2) { if (mj1 == null || mj2 == null) { throw new NullPointerException(); @@ -190,7 +242,7 @@ public int compare(MapJoinOperator mj1, MapJoinOperator mj2) { SortedSet sortedMapJoins = new TreeSet(comp); sortedMapJoins.addAll(mapJoins); - long remainingSize = totalAvailableMemory / 2; + long remainingSize = mapJoinTotalAvailableMemory / 2; Iterator it = sortedMapJoins.iterator(); @@ -221,15 +273,15 @@ public int compare(MapJoinOperator mj1, MapJoinOperator mj2) { sortedMapJoins.addAll(mapJoins); totalLargeJoins = total; - if (totalLargeJoins > totalAvailableMemory) { + if (totalLargeJoins > mapJoinTotalAvailableMemory) { // this shouldn't happen throw new HiveException(); } - remainingSize = totalAvailableMemory / 2; + remainingSize = mapJoinTotalAvailableMemory / 2; } // we used half the mem for small joins, now let's scale the rest - double weight = (remainingSize + totalAvailableMemory / 2) / (double) totalLargeJoins; + double weight = (remainingSize + mapJoinTotalAvailableMemory / 2) / (double) totalLargeJoins; for (MapJoinOperator mj : sortedMapJoins) { long size = (long)(weight * sizes.get(mj)); @@ -241,7 +293,7 @@ public int compare(MapJoinOperator mj1, MapJoinOperator mj2) { } } catch (HiveException e) { // if we have issues with stats, just scale linearily - long size = totalAvailableMemory / mapJoins.size(); + long size = mapJoinTotalAvailableMemory / mapJoins.size(); if (LOG.isInfoEnabled()) { LOG.info("Scaling mapjoin memory w/o stats"); } @@ -255,6 +307,47 @@ public int compare(MapJoinOperator mj1, MapJoinOperator mj2) { } } + private List getOutputEdges(BaseWork w) { + List outputEdges = new ArrayList(); + + for (BaseWork b : tezWork.getChildren(w)) { + outputEdges.add(tezWork.getEdgeProperty(w, b)); + } + + return outputEdges; + } + + private List getInputEdges(BaseWork w) { + List inputEdges = new ArrayList(); + for (BaseWork b : tezWork.getParents(w)) { + inputEdges.add(tezWork.getEdgeProperty(b, w)); + } + + return inputEdges; + } + + private void evaluateGbyOps(BaseWork w, Set gbyOps) { + for (GroupByOperator gbyOp : gbyOps) { + if (gbyOp.getConf().getMode() == GroupByDesc.Mode.HASH) { + // if it is a map side aggregation, lower the io memory to the stats of the gby. + // otherwise do nothing. By experiments that we have run with TPCDS data, this + // gives us the right amount of memory. + long memoryNeeded = gbyOp.getConf().getStatistics().getDataSize(); + if (this.ioMemory > memoryNeeded) { + // find the outgoing edges from this work item and set the memory required to the data + // size in this case. + if (memoryNeeded < defaultBufferSize) { + memoryNeeded = defaultBufferSize; + } + List outputEdges = getOutputEdges(w); + for (TezEdgeProperty edge : outputEdges) { + edge.setSortBufferMemRequired(memoryNeeded); + } + } + } + } + } + private long computeSizeToFitInMem(MapJoinOperator mj) throws HiveException { return (long) (Math.max(this.minimumHashTableSize, computeInputSize(mj)) * this.inflationFactor); } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java index 70912e0..f02f12a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java @@ -134,9 +134,10 @@ public static ReduceWork createReduceWork( if (reduceWork.isAutoReduceParallelism()) { edgeProp = new TezEdgeProperty(context.conf, edgeType, true, - reduceWork.getMinReduceTasks(), reduceWork.getMaxReduceTasks(), bytesPerReducer); + reduceWork.getMinReduceTasks(), reduceWork.getMaxReduceTasks(), bytesPerReducer, + reduceSink.getStatistics().getDataSize()); } else { - edgeProp = new TezEdgeProperty(edgeType); + edgeProp = new TezEdgeProperty(edgeType, reduceSink.getStatistics().getDataSize()); } tezWork.connect( diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java index 1a49de1..be92cfe 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; +import org.apache.hadoop.hive.ql.exec.OperatorUtils; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.lib.Node; @@ -129,6 +130,7 @@ public Object process(Node nd, Stack stack, } else { work = GenTezUtils.createReduceWork(context, root, tezWork); } + work.setMemoryNeeded(operator.getStatistics().getDataSize()); context.rootToWorkMap.put(root, work); } @@ -452,9 +454,10 @@ public Object process(Node nd, Stack stack, if (rWork.isAutoReduceParallelism()) { edgeProp = new TezEdgeProperty(context.conf, edgeType, true, - rWork.getMinReduceTasks(), rWork.getMaxReduceTasks(), bytesPerReducer); + rWork.getMinReduceTasks(), rWork.getMaxReduceTasks(), bytesPerReducer, + rs.getStatistics().getDataSize()); } else { - edgeProp = new TezEdgeProperty(edgeType); + edgeProp = new TezEdgeProperty(edgeType, rs.getStatistics().getDataSize()); } tezWork.connect(work, followingWork, edgeProp); context.connectedReduceSinks.add(rs); @@ -499,7 +502,7 @@ private int getFollowingWorkIndex(TezWork tezWork, UnionWork unionWork, ReduceSi private void connectUnionWorkWithWork(UnionWork unionWork, BaseWork work, TezWork tezWork, GenTezProcContext context) { LOG.debug("Connecting union work (" + unionWork + ") with work (" + work + ")"); - TezEdgeProperty edgeProp = new TezEdgeProperty(EdgeType.CONTAINS); + TezEdgeProperty edgeProp = new TezEdgeProperty(EdgeType.CONTAINS, work.getMemoryNeeded()); tezWork.connect(unionWork, work, edgeProp); unionWork.addUnionOperators(context.currentUnionOperators); context.workWithUnionOperators.add(work); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index ff971ac..b45960e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -482,8 +482,9 @@ protected void optimizeTaskPlan(List> rootTasks, Pa LOG.debug("Skipping stage id rearranger"); } - if ((conf.getBoolVar(HiveConf.ConfVars.HIVE_TEZ_ENABLE_MEMORY_MANAGER)) - && (conf.getBoolVar(HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN))) { + if (((conf.getBoolVar(HiveConf.ConfVars.HIVE_TEZ_ENABLE_MEMORY_MANAGER_FOR_HASH_JOIN)) + && (conf.getBoolVar(HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN))) + || (conf.getBoolVar(HiveConf.ConfVars.HIVE_TEZ_ENABLE_MEMORY_MANAGER))) { physicalCtx = new MemoryDecider().resolve(physicalCtx); } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java index bc67e5a..575f67c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java @@ -30,7 +30,7 @@ protected Statistics statistics; protected transient OpTraits opTraits; protected transient Map opProps; - protected long memNeeded = 0; + protected long memNeeded = -1; @Override @Explain(skipHeader = true, displayName = "Statistics", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java index b088326..4435c77 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java @@ -75,20 +75,20 @@ public void addMergedWork(BaseWork work, BaseWork connectWork, if (connectWork != null) { this.mergeWorkList.add(connectWork); + setMemoryNeeded(getMemoryNeeded() + connectWork.getMemoryNeeded()); if ((connectWork instanceof ReduceWork) && (bigTableWork != null)) { /* * For tez to route data from an up-stream vertex correctly to the following vertex, the * output name in the reduce sink needs to be setup appropriately. In the case of reduce * side merge work, we need to ensure that the parent work that provides data to this merge * work is setup to point to the right vertex name - the main work name. - * + * * In this case, if the big table work has already been created, we can hook up the merge * work items for the small table correctly. */ setReduceSinkOutputName(connectWork, leafOperatorToFollowingWork, bigTableWork.getName()); } } - if (work != null) { /* * Same reason as above. This is the case when we have the main work item after the merge work @@ -99,6 +99,7 @@ public void addMergedWork(BaseWork work, BaseWork connectWork, setReduceSinkOutputName(mergeWork, leafOperatorToFollowingWork, work.getName()); } } + setMemoryNeeded(work.getMemoryNeeded() + getMemoryNeeded()); } } @@ -164,7 +165,8 @@ public void setLlapMode(boolean llapMode) { public boolean getLlapMode() { return getMainWork().getLlapMode(); } - + + @Override public void addDummyOp(HashTableDummyOperator dummyOp) { getMainWork().addDummyOp(dummyOp); } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/TezEdgeProperty.java ql/src/java/org/apache/hadoop/hive/ql/plan/TezEdgeProperty.java index a3aa12f..aada4d4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/TezEdgeProperty.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TezEdgeProperty.java @@ -18,7 +18,12 @@ package org.apache.hadoop.hive.ql.plan; +import java.io.IOException; + import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.tez.DagUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class TezEdgeProperty { @@ -30,25 +35,31 @@ CUSTOM_SIMPLE_EDGE, } - private HiveConf hiveConf; - private EdgeType edgeType; - private int numBuckets; + private final HiveConf hiveConf; + private final EdgeType edgeType; + private final int numBuckets; private boolean isAutoReduce; private int minReducer; private int maxReducer; private long inputSizePerReducer; + private long dataFlowSize; + private float shuffleMemFraction; + private long sortBufferMemRequired; + + private static final Logger LOG = LoggerFactory.getLogger(DagUtils.class.getName()); + - public TezEdgeProperty(HiveConf hiveConf, EdgeType edgeType, - int buckets) { + public TezEdgeProperty(HiveConf hiveConf, EdgeType edgeType, int buckets, long dataFlowSize) { this.hiveConf = hiveConf; this.edgeType = edgeType; this.numBuckets = buckets; + this.dataFlowSize = dataFlowSize; } public TezEdgeProperty(HiveConf hiveConf, EdgeType edgeType, boolean isAutoReduce, - int minReducer, int maxReducer, long bytesPerReducer) { - this(hiveConf, edgeType, -1); + int minReducer, int maxReducer, long bytesPerReducer, long dataFlowSize) { + this(hiveConf, edgeType, -1, dataFlowSize); this.minReducer = minReducer; this.maxReducer = maxReducer; this.isAutoReduce = isAutoReduce; @@ -56,7 +67,11 @@ public TezEdgeProperty(HiveConf hiveConf, EdgeType edgeType, boolean isAutoReduc } public TezEdgeProperty(EdgeType edgeType) { - this(null, edgeType, -1); + this(edgeType, 0); + } + + public TezEdgeProperty(EdgeType edgeType, long dataFlowSize) { + this(null, edgeType, -1, dataFlowSize); } public EdgeType getEdgeType() { @@ -86,4 +101,31 @@ public int getMaxReducer() { public long getInputSizePerReducer() { return inputSizePerReducer; } + + public long getDataFlowSize() { + return dataFlowSize; + } + + public void setDataFlowSize(long dataFlowSize) { + this.dataFlowSize = dataFlowSize; + } + + public double getShuffleMemFraction() { + return shuffleMemFraction; + } + + public void setShuffleMemFraction(float shuffleMemFraction) { + this.shuffleMemFraction = shuffleMemFraction; + } + + public long getSortBufferMemRequiredMB() { + long mbSize = sortBufferMemRequired / (1024 * 1024); + mbSize = (mbSize > 0) ? mbSize : 1; + + return mbSize; + } + + public void setSortBufferMemRequired(long sortBufferMemRequired) { + this.sortBufferMemRequired = sortBufferMemRequired; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java index 8b82c66..2468b58 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; -import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; @@ -32,8 +31,6 @@ import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.hive.ql.plan.Explain.Level; @@ -65,8 +62,6 @@ public static boolean isCustomInputType(VertexType vertex) { } } - private static transient final Logger LOG = LoggerFactory.getLogger(TezWork.class); - private static int counter; private final String name; private final Set roots = new HashSet(); @@ -181,6 +176,7 @@ public void disconnect(BaseWork a, BaseWork b) { if (getChildren(a).isEmpty()) { leaves.add(a); } + edgeProperties.remove(new ImmutablePair(a, b)); } /** @@ -234,6 +230,7 @@ public void remove(BaseWork work) { if (invertedWorkGraph.get(w).size() == 0) { roots.add(w); } + edgeProperties.remove(new ImmutablePair(work, w)); } for (BaseWork w: parents) { @@ -241,6 +238,7 @@ public void remove(BaseWork work) { if (workGraph.get(w).size() == 0) { leaves.add(w); } + edgeProperties.remove(new ImmutablePair(w, work)); } roots.remove(work); @@ -250,6 +248,7 @@ public void remove(BaseWork work) { invertedWorkGraph.remove(work); } + @SuppressWarnings({ "rawtypes", "unchecked" }) public EdgeType getEdgeType(BaseWork a, BaseWork b) { return edgeProperties.get(new ImmutablePair(a,b)).getEdgeType(); } @@ -257,6 +256,7 @@ public EdgeType getEdgeType(BaseWork a, BaseWork b) { /** * returns the edge type connecting work a and b */ + @SuppressWarnings({ "rawtypes", "unchecked" }) public TezEdgeProperty getEdgeProperty(BaseWork a, BaseWork b) { return edgeProperties.get(new ImmutablePair(a,b)); } @@ -288,6 +288,7 @@ public int compareTo(Dependency o) { } } + @SuppressWarnings({ "unchecked", "rawtypes" }) @Explain(displayName = "Edges", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public Map> getDependencyMap() { Map> result = new LinkedHashMap>(); @@ -351,6 +352,7 @@ public int compareTo(Dependency o) { * to be added prior to calling connect. * @param */ + @SuppressWarnings({ "rawtypes", "unchecked" }) public void connect(BaseWork a, BaseWork b, TezEdgeProperty edgeProp) { workGraph.get(a).add(b); diff --git ql/src/test/results/clientpositive/llap/tez_join.q.out ql/src/test/results/clientpositive/llap/tez_join.q.out index 41e2156..2dd8b74 100644 --- ql/src/test/results/clientpositive/llap/tez_join.q.out +++ ql/src/test/results/clientpositive/llap/tez_join.q.out @@ -91,6 +91,8 @@ STAGE PLANS: expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Dummy Store + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Execution mode: llap Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/llap/tez_smb_1.q.out ql/src/test/results/clientpositive/llap/tez_smb_1.q.out index 747bfe7..bd09895 100644 --- ql/src/test/results/clientpositive/llap/tez_smb_1.q.out +++ ql/src/test/results/clientpositive/llap/tez_smb_1.q.out @@ -137,6 +137,8 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Dummy Store + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan alias: s1 @@ -541,6 +543,8 @@ STAGE PLANS: expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Dummy Store + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Execution mode: llap Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/llap/tez_smb_main.q.out ql/src/test/results/clientpositive/llap/tez_smb_main.q.out index f15339b..bcb58e6 100644 --- ql/src/test/results/clientpositive/llap/tez_smb_main.q.out +++ ql/src/test/results/clientpositive/llap/tez_smb_main.q.out @@ -590,6 +590,8 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Dummy Store + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan alias: b @@ -832,6 +834,8 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Dummy Store + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan alias: s1 diff --git ql/src/test/results/clientpositive/tez/auto_sortmerge_join_5.q.out ql/src/test/results/clientpositive/tez/auto_sortmerge_join_5.q.out index d4e2d6b..1904bc0 100644 --- ql/src/test/results/clientpositive/tez/auto_sortmerge_join_5.q.out +++ ql/src/test/results/clientpositive/tez/auto_sortmerge_join_5.q.out @@ -133,6 +133,8 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + Dummy Store + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -370,6 +372,8 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + Dummy Store + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git ql/src/test/results/clientpositive/tez/auto_sortmerge_join_6.q.out ql/src/test/results/clientpositive/tez/auto_sortmerge_join_6.q.out index 2956bf8..40c7bd1 100644 --- ql/src/test/results/clientpositive/tez/auto_sortmerge_join_6.q.out +++ ql/src/test/results/clientpositive/tez/auto_sortmerge_join_6.q.out @@ -112,6 +112,8 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Dummy Store + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan alias: a @@ -239,6 +241,8 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Dummy Store + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan alias: a @@ -366,6 +370,8 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Dummy Store + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan alias: a @@ -489,6 +495,8 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Dummy Store + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan alias: a @@ -611,6 +619,8 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Dummy Store + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan alias: c @@ -622,6 +632,8 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Dummy Store + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan alias: a @@ -715,6 +727,8 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Dummy Store + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan alias: a @@ -838,6 +852,8 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Dummy Store + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan alias: a @@ -961,6 +977,8 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Dummy Store + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan alias: a @@ -1083,6 +1101,8 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Dummy Store + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan alias: c @@ -1094,6 +1114,8 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Dummy Store + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan alias: a @@ -1187,6 +1209,8 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Dummy Store + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan alias: a diff --git ql/src/test/results/clientpositive/tez/explainuser_2.q.out ql/src/test/results/clientpositive/tez/explainuser_2.q.out index 61a9580..badc53a 100644 --- ql/src/test/results/clientpositive/tez/explainuser_2.q.out +++ ql/src/test/results/clientpositive/tez/explainuser_2.q.out @@ -2621,15 +2621,17 @@ Stage-0 | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE | - |<-Select Operator [SEL_5] - | outputColumnNames:["_col0"] + |<-Dummy Store [OP_18] | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_16] - | predicate:key is not null (type: boolean) + | Select Operator [SEL_5] + | outputColumnNames:["_col0"] | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_3] - | alias:s1 + | Filter Operator [FIL_16] + | predicate:key is not null (type: boolean) | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_3] + | alias:s1 + | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE |<-Select Operator [SEL_2] outputColumnNames:["_col0","_col1"] Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE @@ -2678,15 +2680,17 @@ Stage-0 | | outputColumnNames:["_col0","_col1"] | | Statistics:Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE | | - | |<-Select Operator [SEL_5] - | | outputColumnNames:["_col0"] + | |<-Dummy Store [OP_30] | | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_27] - | | predicate:key is not null (type: boolean) + | | Select Operator [SEL_5] + | | outputColumnNames:["_col0"] | | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_3] - | | alias:s1 + | | Filter Operator [FIL_27] + | | predicate:key is not null (type: boolean) | | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_3] + | | alias:s1 + | | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE | |<-Select Operator [SEL_2] | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE @@ -2735,15 +2739,17 @@ Stage-0 | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE | - |<-Select Operator [SEL_5] - | outputColumnNames:["_col0"] + |<-Dummy Store [OP_18] | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_16] - | predicate:key is not null (type: boolean) + | Select Operator [SEL_5] + | outputColumnNames:["_col0"] | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_3] - | alias:s3 + | Filter Operator [FIL_16] + | predicate:key is not null (type: boolean) | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_3] + | alias:s3 + | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE |<-Select Operator [SEL_2] outputColumnNames:["_col0","_col1"] Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE @@ -2792,15 +2798,17 @@ Stage-0 | | outputColumnNames:["_col0","_col1"] | | Statistics:Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE | | - | |<-Select Operator [SEL_5] - | | outputColumnNames:["_col0"] + | |<-Dummy Store [OP_30] | | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_27] - | | predicate:key is not null (type: boolean) + | | Select Operator [SEL_5] + | | outputColumnNames:["_col0"] | | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_3] - | | alias:s3 + | | Filter Operator [FIL_27] + | | predicate:key is not null (type: boolean) | | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_3] + | | alias:s3 + | | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE | |<-Select Operator [SEL_2] | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE @@ -2900,15 +2908,17 @@ Stage-0 | | outputColumnNames:["_col0"] | | Statistics:Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE | | - | |<-Select Operator [SEL_5] - | | outputColumnNames:["_col0"] + | |<-Dummy Store [OP_42] | | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_38] - | | predicate:key is not null (type: boolean) + | | Select Operator [SEL_5] + | | outputColumnNames:["_col0"] | | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_3] - | | alias:s1 + | | Filter Operator [FIL_38] + | | predicate:key is not null (type: boolean) | | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_3] + | | alias:s1 + | | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE | |<-Select Operator [SEL_2] | outputColumnNames:["_col0"] | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE @@ -3035,15 +3045,17 @@ Stage-0 | | outputColumnNames:["_col0","_col1"] | | Statistics:Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE | | - | |<-Select Operator [SEL_5] - | | outputColumnNames:["_col0"] + | |<-Dummy Store [OP_55] | | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_50] - | | predicate:key is not null (type: boolean) + | | Select Operator [SEL_5] + | | outputColumnNames:["_col0"] | | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_3] - | | alias:s1 + | | Filter Operator [FIL_50] + | | predicate:key is not null (type: boolean) | | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_3] + | | alias:s1 + | | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE | |<-Select Operator [SEL_2] | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/tez/explainuser_3.q.out ql/src/test/results/clientpositive/tez/explainuser_3.q.out index 33d9457..aade0c4 100644 --- ql/src/test/results/clientpositive/tez/explainuser_3.q.out +++ ql/src/test/results/clientpositive/tez/explainuser_3.q.out @@ -32,23 +32,23 @@ Stage-0 Reducer 2 vectorized File Output Operator [FS_8] compressed:false - Statistics:Num rows: 10 Data size: 1704 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 10 Data size: 1714 Basic stats: COMPLETE Column stats: NONE table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} Select Operator [OP_7] | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 10 Data size: 1704 Basic stats: COMPLETE Column stats: NONE + | Statistics:Num rows: 10 Data size: 1714 Basic stats: COMPLETE Column stats: NONE |<-Map 1 [SIMPLE_EDGE] vectorized Reduce Output Operator [RS_6] key expressions:_col0 (type: int), _col1 (type: string) sort order:++ - Statistics:Num rows: 10 Data size: 1704 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 10 Data size: 1714 Basic stats: COMPLETE Column stats: NONE Select Operator [OP_5] outputColumnNames:["_col0","_col1"] - Statistics:Num rows: 10 Data size: 1704 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 10 Data size: 1714 Basic stats: COMPLETE Column stats: NONE TableScan [TS_0] ACID table:true alias:acid_vectorized - Statistics:Num rows: 10 Data size: 1704 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 10 Data size: 1714 Basic stats: COMPLETE Column stats: NONE PREHOOK: query: explain select key, value FROM srcpart LATERAL VIEW explode(array(1,2,3)) myTable AS myCol diff --git ql/src/test/results/clientpositive/tez/tez_join.q.out ql/src/test/results/clientpositive/tez/tez_join.q.out index bbf196c..388935e 100644 --- ql/src/test/results/clientpositive/tez/tez_join.q.out +++ ql/src/test/results/clientpositive/tez/tez_join.q.out @@ -89,6 +89,8 @@ STAGE PLANS: expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Dummy Store + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) diff --git ql/src/test/results/clientpositive/tez/tez_smb_1.q.out ql/src/test/results/clientpositive/tez/tez_smb_1.q.out index 5214cae..942394c 100644 --- ql/src/test/results/clientpositive/tez/tez_smb_1.q.out +++ ql/src/test/results/clientpositive/tez/tez_smb_1.q.out @@ -137,6 +137,8 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Dummy Store + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan alias: s1 @@ -527,6 +529,8 @@ STAGE PLANS: expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Dummy Store + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) diff --git ql/src/test/results/clientpositive/tez/tez_smb_empty.q.out ql/src/test/results/clientpositive/tez/tez_smb_empty.q.out index 8c9ab2e..6540e68 100644 --- ql/src/test/results/clientpositive/tez/tez_smb_empty.q.out +++ ql/src/test/results/clientpositive/tez/tez_smb_empty.q.out @@ -149,6 +149,8 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Dummy Store + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Map Operator Tree: TableScan alias: s1 @@ -234,6 +236,8 @@ STAGE PLANS: expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Dummy Store + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Map Operator Tree: TableScan alias: s1 @@ -544,6 +548,8 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Dummy Store + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan alias: s3 @@ -555,6 +561,8 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Dummy Store + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Map Operator Tree: TableScan alias: s1 @@ -644,6 +652,8 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Dummy Store + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Map Operator Tree: TableScan alias: s1 @@ -655,6 +665,8 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Dummy Store + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan alias: s1 @@ -747,6 +759,8 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Dummy Store + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Map Operator Tree: TableScan alias: s1 @@ -833,6 +847,8 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Dummy Store + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan alias: s1 diff --git ql/src/test/results/clientpositive/tez/tez_smb_main.q.out ql/src/test/results/clientpositive/tez/tez_smb_main.q.out index b711089..2451bcb 100644 --- ql/src/test/results/clientpositive/tez/tez_smb_main.q.out +++ ql/src/test/results/clientpositive/tez/tez_smb_main.q.out @@ -578,6 +578,8 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Dummy Store + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan alias: b @@ -813,6 +815,8 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Dummy Store + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan alias: s1 diff --git ql/src/test/results/clientpositive/tez/vector_auto_smb_mapjoin_14.q.out ql/src/test/results/clientpositive/tez/vector_auto_smb_mapjoin_14.q.out index 9b62353..977e621 100644 --- ql/src/test/results/clientpositive/tez/vector_auto_smb_mapjoin_14.q.out +++ ql/src/test/results/clientpositive/tez/vector_auto_smb_mapjoin_14.q.out @@ -86,15 +86,17 @@ Stage-0 | keys:{"0":"_col0 (type: int)","1":"_col0 (type: int)"} | Statistics:Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE | - |<-Select Operator [SEL_5] - | outputColumnNames:["_col0"] + |<-Dummy Store [OP_22] | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_20] - | predicate:key is not null (type: boolean) + | Select Operator [SEL_5] + | outputColumnNames:["_col0"] | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_3] - | alias:b + | Filter Operator [FIL_20] + | predicate:key is not null (type: boolean) | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_3] + | alias:b + | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE |<-Select Operator [SEL_2] outputColumnNames:["_col0"] Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE @@ -192,15 +194,17 @@ Stage-0 | outputColumnNames:["_col0"] | Statistics:Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE | - |<-Select Operator [SEL_5] - | outputColumnNames:["_col0"] + |<-Dummy Store [OP_27] | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_25] - | predicate:key is not null (type: boolean) + | Select Operator [SEL_5] + | outputColumnNames:["_col0"] | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_3] - | alias:b + | Filter Operator [FIL_25] + | predicate:key is not null (type: boolean) | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_3] + | alias:b + | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE |<-Select Operator [SEL_2] outputColumnNames:["_col0"] Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE @@ -328,15 +332,17 @@ Stage-0 | | outputColumnNames:["_col0"] | | Statistics:Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE | | - | |<-Select Operator [SEL_5] - | | outputColumnNames:["_col0"] + | |<-Dummy Store [OP_52] | | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_48] - | | predicate:key is not null (type: boolean) + | | Select Operator [SEL_5] + | | outputColumnNames:["_col0"] | | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_3] - | | alias:b + | | Filter Operator [FIL_48] + | | predicate:key is not null (type: boolean) | | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_3] + | | alias:b + | | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE | |<-Select Operator [SEL_2] | outputColumnNames:["_col0"] | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE @@ -376,15 +382,17 @@ Stage-0 | outputColumnNames:["_col0"] | Statistics:Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE | - |<-Select Operator [SEL_21] - | outputColumnNames:["_col0"] + |<-Dummy Store [OP_54] | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_50] - | predicate:key is not null (type: boolean) + | Select Operator [SEL_21] + | outputColumnNames:["_col0"] | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_19] - | alias:b + | Filter Operator [FIL_50] + | predicate:key is not null (type: boolean) | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_19] + | alias:b + | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE |<-Select Operator [SEL_18] outputColumnNames:["_col0"] Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE @@ -489,15 +497,17 @@ Stage-0 | keys:{"0":"_col0 (type: int)","1":"_col0 (type: int)"} | Statistics:Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE | - |<-Select Operator [SEL_5] - | outputColumnNames:["_col0"] + |<-Dummy Store [OP_22] | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_20] - | predicate:(key < 6) (type: boolean) + | Select Operator [SEL_5] + | outputColumnNames:["_col0"] | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_3] - | alias:a - | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_20] + | predicate:(key < 6) (type: boolean) + | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_3] + | alias:a + | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE |<-Select Operator [SEL_2] outputColumnNames:["_col0"] Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE @@ -587,15 +597,17 @@ Stage-0 | keys:{"0":"_col0 (type: int)","1":"_col0 (type: int)"} | Statistics:Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: NONE | - |<-Select Operator [SEL_5] - | outputColumnNames:["_col0"] + |<-Dummy Store [OP_22] | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_20] - | predicate:(((key < 8) and (key < 6)) and key is not null) (type: boolean) + | Select Operator [SEL_5] + | outputColumnNames:["_col0"] | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_3] - | alias:b - | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_20] + | predicate:(((key < 8) and (key < 6)) and key is not null) (type: boolean) + | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_3] + | alias:b + | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE |<-Select Operator [SEL_2] outputColumnNames:["_col0"] Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE @@ -709,15 +721,17 @@ Stage-0 | keys:{"0":"_col0 (type: int)","1":"_col0 (type: int)"} | Statistics:Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: NONE | - |<-Select Operator [SEL_5] - | outputColumnNames:["_col0"] + |<-Dummy Store [OP_22] | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_20] - | predicate:(((key < 8) and (key < 6)) and key is not null) (type: boolean) + | Select Operator [SEL_5] + | outputColumnNames:["_col0"] | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_3] - | alias:a - | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_20] + | predicate:(((key < 8) and (key < 6)) and key is not null) (type: boolean) + | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_3] + | alias:a + | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE |<-Select Operator [SEL_2] outputColumnNames:["_col0"] Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE @@ -821,15 +835,17 @@ Stage-0 | keys:{"0":"_col0 (type: int)","1":"_col0 (type: int)"} | Statistics:Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE | - |<-Select Operator [SEL_5] - | outputColumnNames:["_col0"] + |<-Dummy Store [OP_22] | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_20] - | predicate:(key < 8) (type: boolean) + | Select Operator [SEL_5] + | outputColumnNames:["_col0"] | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_3] - | alias:a - | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_20] + | predicate:(key < 8) (type: boolean) + | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_3] + | alias:a + | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE |<-Select Operator [SEL_2] outputColumnNames:["_col0"] Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE @@ -1011,15 +1027,17 @@ Stage-0 | keys:{"0":"_col0 (type: int)","1":"_col0 (type: int)"} | Statistics:Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE | - |<-Select Operator [SEL_5] - | outputColumnNames:["_col0"] + |<-Dummy Store [OP_22] | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_20] - | predicate:(key < 6) (type: boolean) + | Select Operator [SEL_5] + | outputColumnNames:["_col0"] | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_3] - | alias:a - | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_20] + | predicate:(key < 6) (type: boolean) + | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_3] + | alias:a + | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE |<-Select Operator [SEL_2] outputColumnNames:["_col0"] Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE @@ -1101,25 +1119,29 @@ Stage-0 | keys:{"0":"_col0 (type: int)","1":"_col0 (type: int)","2":"_col0 (type: int)"} | Statistics:Num rows: 6 Data size: 613 Basic stats: COMPLETE Column stats: NONE | - |<-Select Operator [SEL_5] - | outputColumnNames:["_col0"] + |<-Dummy Store [OP_32] | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_29] - | predicate:(key < 6) (type: boolean) + | Select Operator [SEL_5] + | outputColumnNames:["_col0"] | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_3] - | alias:a - | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_29] + | predicate:(key < 6) (type: boolean) + | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_3] + | alias:a + | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE | - |<-Select Operator [SEL_8] - | outputColumnNames:["_col0"] + |<-Dummy Store [OP_33] | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_30] - | predicate:(key < 6) (type: boolean) + | Select Operator [SEL_8] + | outputColumnNames:["_col0"] | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_6] - | alias:a - | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_30] + | predicate:(key < 6) (type: boolean) + | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_6] + | alias:a + | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE |<-Select Operator [SEL_2] outputColumnNames:["_col0"] Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE @@ -1217,15 +1239,17 @@ Stage-0 | keys:{"0":"_col0 (type: int)","1":"_col0 (type: int)"} | Statistics:Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: NONE | - |<-Select Operator [SEL_5] - | outputColumnNames:["_col0"] + |<-Dummy Store [OP_22] | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_20] - | predicate:(((key < 8) and (key < 6)) and key is not null) (type: boolean) + | Select Operator [SEL_5] + | outputColumnNames:["_col0"] | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_3] - | alias:b - | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_20] + | predicate:(((key < 8) and (key < 6)) and key is not null) (type: boolean) + | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_3] + | alias:b + | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE |<-Select Operator [SEL_2] outputColumnNames:["_col0"] Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE @@ -1328,12 +1352,14 @@ Stage-4 | outputColumnNames:["_col0","_col1","_col6"] | Statistics:Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE | - |<-Filter Operator [FIL_15] - | predicate:key is not null (type: boolean) + |<-Dummy Store [OP_17] | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_1] - | alias:b + | Filter Operator [FIL_15] + | predicate:key is not null (type: boolean) | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_1] + | alias:b + | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE |<-Filter Operator [FIL_14] predicate:key is not null (type: boolean) Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE @@ -1508,12 +1534,14 @@ Stage-4 | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE | - |<-Filter Operator [FIL_20] - | predicate:key is not null (type: boolean) + |<-Dummy Store [OP_22] | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_1] - | alias:b + | Filter Operator [FIL_20] + | predicate:key is not null (type: boolean) | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_1] + | alias:b + | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE |<-Filter Operator [FIL_19] predicate:key is not null (type: boolean) Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE