diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 00c9f4d..8e0fd37 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -455,6 +455,7 @@ spark.query.files=add_part_multiple.q, \ auto_sortmerge_join_13.q, \ auto_sortmerge_join_14.q, \ auto_sortmerge_join_15.q, \ + auto_sortmerge_join_16.q, \ auto_sortmerge_join_2.q, \ auto_sortmerge_join_3.q, \ auto_sortmerge_join_4.q, \ diff --git ql/src/java/org/apache/hadoop/hive/ql/lib/TypeRule.java ql/src/java/org/apache/hadoop/hive/ql/lib/TypeRule.java new file mode 100644 index 0000000..b55805c --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/lib/TypeRule.java @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.lib; + +import org.apache.hadoop.hive.ql.parse.SemanticException; + +import java.util.Stack; +import java.util.regex.Matcher; + +/** + * Rule that matches a particular type of node. + */ +public class TypeRule implements Rule { + + private Class nodeClass; + + public TypeRule(Class nodeClass) { + this.nodeClass = nodeClass; + } + + @Override + public int cost(Stack stack) throws SemanticException { + if (stack == null) { + return -1; + } + if (nodeClass.isInstance(stack.peek())) { + return 1; + } + return -1; + } + + @Override + public String getName() { + return nodeClass.getName(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index ae1d1ab..c1e4a9a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -96,7 +96,7 @@ public void initialize(HiveConf hiveConf) { // If optimize hive.optimize.bucketmapjoin.sortedmerge is set, add both // BucketMapJoinOptimizer and SortedMergeBucketMapJoinOptimizer if ((HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTSORTMERGEBUCKETMAPJOIN)) - && !isTezExecEngine && !isSparkExecEngine) { + && !isTezExecEngine) { if (!bucketMapJoinOptimizer) { // No need to add BucketMapJoinOptimizer twice transformations.add(new BucketMapJoinOptimizer()); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkSortMergeJoinFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkSortMergeJoinFactory.java new file mode 100644 index 0000000..42e6b8b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkSortMergeJoinFactory.java @@ -0,0 +1,168 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +package org.apache.hadoop.hive.ql.optimizer.spark; + +import org.apache.hadoop.hive.ql.exec.*; +import org.apache.hadoop.hive.ql.exec.spark.SparkTask; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.parse.spark.GenSparkProcContext; +import org.apache.hadoop.hive.ql.plan.*; + +import java.util.List; +import java.util.Map; +import java.util.Stack; + +/** +* Operator factory for Spark SMBJoin processing. +*/ +public final class SparkSortMergeJoinFactory { + + private SparkSortMergeJoinFactory() { + // prevent instantiation + } + + public static int getPositionParent(SMBMapJoinOperator op, + Stack stack) { + int size = stack.size(); + assert size >= 2 && stack.get(size - 1) == op; + Operator parent = + (Operator) stack.get(size - 2); + List> parOp = op.getParentOperators(); + int pos = parOp.indexOf(parent); + return pos; + } + + /** + * SortMergeMapJoin processor, input is a SMBJoinOp that is part of a MapWork: + * + * MapWork: + * + * (Big) (Small) (Small) + * TS TS TS + * \ | / + * \ DS DS + * \ | / + * SMBJoinOP + * + * 1. Initializes the MapWork's aliasToWork, pointing to big-table's TS. + * 2. Adds the bucketing information to the MapWork. + * 3. Adds localwork to the MapWork, with localWork's aliasToWork pointing to small-table's TS. + */ + private static class SortMergeJoinProcessor implements NodeProcessor { + + public static void setupBucketMapJoinInfo(MapWork plan, SMBMapJoinOperator currMapJoinOp) { + if (currMapJoinOp != null) { + Map>> aliasBucketFileNameMapping = + currMapJoinOp.getConf().getAliasBucketFileNameMapping(); + if (aliasBucketFileNameMapping != null) { + MapredLocalWork localPlan = plan.getMapLocalWork(); + if (localPlan == null) { + localPlan = currMapJoinOp.getConf().getLocalWork(); + } else { + // local plan is not null, we want to merge it into SMBMapJoinOperator's local work + MapredLocalWork smbLocalWork = currMapJoinOp.getConf().getLocalWork(); + if (smbLocalWork != null) { + localPlan.getAliasToFetchWork().putAll(smbLocalWork.getAliasToFetchWork()); + localPlan.getAliasToWork().putAll(smbLocalWork.getAliasToWork()); + } + } + + if (localPlan == null) { + return; + } + plan.setMapLocalWork(null); + currMapJoinOp.getConf().setLocalWork(localPlan); + + BucketMapJoinContext bucketMJCxt = new BucketMapJoinContext(); + localPlan.setBucketMapjoinContext(bucketMJCxt); + bucketMJCxt.setAliasBucketFileNameMapping(aliasBucketFileNameMapping); + bucketMJCxt.setBucketFileNameMapping( + currMapJoinOp.getConf().getBigTableBucketNumMapping()); + localPlan.setInputFileChangeSensitive(true); + bucketMJCxt.setMapJoinBigTableAlias(currMapJoinOp.getConf().getBigTableAlias()); + bucketMJCxt + .setBucketMatcherClass(org.apache.hadoop.hive.ql.exec.DefaultBucketMatcher.class); + bucketMJCxt.setBigTablePartSpecToFileMapping( + currMapJoinOp.getConf().getBigTablePartSpecToFileMapping()); + + plan.setUseBucketizedHiveInputFormat(true); + + } + } + } + + /** + * Initialize the mapWork. + * + * @param opProcCtx + * processing context + */ + private static void initSMBJoinPlan(MapWork mapWork, + GenSparkProcContext opProcCtx, boolean local) + throws SemanticException { + TableScanOperator ts = (TableScanOperator) opProcCtx.currentRootOperator; + String currAliasId = findAliasId(opProcCtx, ts); + GenMapRedUtils.setMapWork(mapWork, opProcCtx.parseContext, + opProcCtx.inputs, null, ts, currAliasId, opProcCtx.conf, local); + } + + private static String findAliasId(GenSparkProcContext opProcCtx, TableScanOperator ts) { + for (String alias : opProcCtx.topOps.keySet()) { + if (opProcCtx.topOps.get(alias) == ts) { + return alias; + } + } + return null; + } + + /** + * 1. Initializes the MapWork's aliasToWork, pointing to big-table's TS. + * 2. Adds the bucketing information to the MapWork. + * 3. Adds localwork to the MapWork, with localWork's aliasToWork pointing to small-table's TS. + */ + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + SMBMapJoinOperator mapJoin = (SMBMapJoinOperator) nd; + GenSparkProcContext ctx = (GenSparkProcContext) procCtx; + + SparkTask currTask = ctx.currentTask; + + // find the branch on which this processor was invoked + int pos = getPositionParent(mapJoin, stack); + boolean local = pos != mapJoin.getConf().getPosBigTable(); + + MapWork mapWork = ctx.smbJoinWorkMap.get(mapJoin); + initSMBJoinPlan(mapWork, ctx, local); + + // find the associated mapWork that contains this processor. + setupBucketMapJoinInfo(mapWork, mapJoin); + + // local aliases need not to hand over context further + return false; + } + } + + public static NodeProcessor getTableScanMapJoin() { + return new SortMergeJoinProcessor(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java index ed88c60..3c37b91 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java @@ -19,34 +19,16 @@ package org.apache.hadoop.hive.ql.parse.spark; import java.io.Serializable; -import java.util.LinkedHashMap; -import java.util.LinkedHashSet; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.exec.DependencyCollectionTask; -import org.apache.hadoop.hive.ql.exec.FileSinkOperator; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator; -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; -import org.apache.hadoop.hive.ql.exec.TableScanOperator; -import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.exec.TaskFactory; -import org.apache.hadoop.hive.ql.exec.UnionOperator; +import org.apache.hadoop.hive.ql.exec.*; import org.apache.hadoop.hive.ql.exec.spark.SparkTask; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.parse.ParseContext; -import org.apache.hadoop.hive.ql.plan.BaseWork; -import org.apache.hadoop.hive.ql.plan.DependencyCollectionWork; -import org.apache.hadoop.hive.ql.plan.MoveWork; -import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.hive.ql.plan.SparkEdgeProperty; -import org.apache.hadoop.hive.ql.plan.SparkWork; +import org.apache.hadoop.hive.ql.plan.*; /** * GenSparkProcContext maintains information about the tasks and operators @@ -99,6 +81,9 @@ // map that says which mapjoin belongs to which work item public final Map> mapJoinWorkMap; + // a map to keep track of which MapWork item holds which SMBMapJoinOp + public final Map smbJoinWorkMap; + // a map to keep track of which root generated which work public final Map, BaseWork> rootToWorkMap; @@ -129,22 +114,28 @@ // remember which reducesinks we've already connected public final Set connectedReduceSinks; + // Alias to operator map, from the semantic analyzer. + // This is necessary as sometimes semantic analyzer's mapping is different than operator's own alias. + public final Map> topOps; + @SuppressWarnings("unchecked") public GenSparkProcContext(HiveConf conf, ParseContext parseContext, List> moveTask, List> rootTasks, - Set inputs, Set outputs) { + Set inputs, Set outputs, Map> topOps) { this.conf = conf; this.parseContext = parseContext; this.moveTask = moveTask; this.rootTasks = rootTasks; this.inputs = inputs; this.outputs = outputs; + this.topOps = topOps; this.currentTask = (SparkTask) TaskFactory.get( new SparkWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID)), conf); this.rootTasks.add(currentTask); this.leafOperatorToFollowingWork = new LinkedHashMap, BaseWork>(); this.linkOpWithWorkMap = new LinkedHashMap, Map>(); this.linkWorkWithReduceSinkMap = new LinkedHashMap>(); + this.smbJoinWorkMap = new LinkedHashMap(); this.mapJoinWorkMap = new LinkedHashMap>(); this.rootToWorkMap = new LinkedHashMap, BaseWork>(); this.childToWorkMap = new LinkedHashMap, List>(); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java index 8e28887..1069f6a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java @@ -86,7 +86,7 @@ public UnionWork createUnionWork(GenSparkProcContext context, Operator operat return unionWork; } - public ReduceWork createReduceWork(GenSparkProcContext context, Operator root, SparkWork sparkWork) { + public ReduceWork createReduceWork(GenSparkProcContext context, Operator root, SparkWork sparkWork) throws SemanticException { Preconditions.checkArgument(!root.getParentOperators().isEmpty(), "AssertionError: expected root.getParentOperators() to be non-empty"); @@ -120,10 +120,15 @@ public ReduceWork createReduceWork(GenSparkProcContext context, Operator root } if (reduceWork.getReducer() instanceof JoinOperator) { - //reduce-side join + //reduce-side join, use MR-style shuffle + edgeProp.setMRShuffle(); + } + if (getChildOperator(reduceWork.getReducer(), FileSinkOperator.class) != null) { + //FileSink to bucketed files assume hash partitioning, so also use MR-style shuffle. edgeProp.setMRShuffle(); } + sparkWork.connect( context.preceedingWork, reduceWork, edgeProp); @@ -150,7 +155,12 @@ protected void setupReduceSink(GenSparkProcContext context, ReduceWork reduceWor } public MapWork createMapWork(GenSparkProcContext context, Operator root, - SparkWork sparkWork, PrunedPartitionList partitions) throws SemanticException { + SparkWork sparkWork, PrunedPartitionList partitions) throws SemanticException { + return createMapWork(context, root, sparkWork, partitions, false); + } + + public MapWork createMapWork(GenSparkProcContext context, Operator root, + SparkWork sparkWork, PrunedPartitionList partitions, boolean deferSetup) throws SemanticException { Preconditions.checkArgument(root.getParentOperators().isEmpty(), "AssertionError: expected root.getParentOperators() to be empty"); MapWork mapWork = new MapWork("Map "+ (++sequenceNumber)); @@ -162,7 +172,9 @@ public MapWork createMapWork(GenSparkProcContext context, Operator root, root.getClass().getName()); String alias = ((TableScanOperator)root).getConf().getAlias(); - setupMapWork(mapWork, context, partitions, root, alias); + if (!deferSetup) { + setupMapWork(mapWork, context, partitions, root, alias); + } // add new item to the Spark work sparkWork.add(mapWork); @@ -313,4 +325,25 @@ public static boolean isSortNecessary(ReduceSinkOperator reduceSinkOperator) { } return true; } + + /** + * Is an operator of the given class a child of the given operator. + * @param op parent operator to start search + * @param klazz given class + * @return + * @throws SemanticException + */ + public static Operator getChildOperator(Operator op, Class klazz) throws SemanticException { + if (klazz.isInstance(op)) { + return op; + } + List> childOperators = op.getChildOperators(); + for (Operator childOp : childOperators) { + Operator result = getChildOperator(childOp, klazz); + if (result != null) { + return result; + } + } + return null; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWork.java ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWork.java index 4f5feca..cc33e69 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWork.java @@ -18,21 +18,15 @@ package org.apache.hadoop.hive.ql.parse.spark; -import java.util.ArrayList; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.Map.Entry; -import java.util.Stack; import com.google.common.base.Strings; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.*; -import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.lib.NodeProcessor; -import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.*; import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.*; @@ -92,6 +86,26 @@ public Object process(Node nd, Stack stack, SparkWork sparkWork = context.currentTask.getWork(); + + if (GenSparkUtils.getChildOperator(root, DummyStoreOperator.class) != null) { + /* + * SMB join case: + * + * (Big) (Small) (Small) + * TS TS TS + * \ | / + * \ DS DS + * \ | / + * SMBJoinOP + * + * Only create MapWork rooted at TS of big table. + * If there are dummy-store operators anywhere in TS's children path, then this is for the small tables. + * No separate Map-Task need to be created for small table TS, as they will be read by the MapWork of the big-table. + */ + return null; + } + SMBMapJoinOperator smbOp = (SMBMapJoinOperator) GenSparkUtils.getChildOperator(root, SMBMapJoinOperator.class); + // Right now the work graph is pretty simple. If there is no // Preceding work we have a root and will generate a map // vertex. If there is a preceding work we will generate @@ -109,7 +123,18 @@ public Object process(Node nd, Stack stack, } else { // create a new vertex if (context.preceedingWork == null) { - work = utils.createMapWork(context, root, sparkWork, null); + if (smbOp != null) { + //This logic is for SortMergeBucket MapJoin case. + //This MapWork (of big-table, see above..) is later initialized by SparkMapJoinFactory processor, so don't initialize it here. + //Just keep track of it in the context, for later processing. + work = utils.createMapWork(context, root, sparkWork, null, true); + if (context.smbJoinWorkMap.get(smbOp) != null) { + throw new SemanticException("Each SMBMapJoin should be associated only with one Mapwork"); + } + context.smbJoinWorkMap.put(smbOp, (MapWork) work); + } else { + work = utils.createMapWork(context, root, sparkWork, null); + } } else { work = utils.createReduceWork(context, root, sparkWork); } @@ -274,7 +299,11 @@ public Object process(Node nd, Stack stack, edgeProp.setShuffleSort(); } if (rWork.getReducer() instanceof JoinOperator) { - //reduce-side join + //reduce-side join, use MR-style shuffle + edgeProp.setMRShuffle(); + } + if (GenSparkUtils.getChildOperator(rWork.getReducer(), FileSinkOperator.class) != null) { + //FileSink to bucketed files assume hash partitioning, so also use MR-style shuffle. edgeProp.setMRShuffle(); } sparkWork.connect(work, rWork, edgeProp); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java index 1c663c4..22164d7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java @@ -32,26 +32,11 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.Context; -import org.apache.hadoop.hive.ql.exec.ConditionalTask; -import org.apache.hadoop.hive.ql.exec.FileSinkOperator; -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; -import org.apache.hadoop.hive.ql.exec.TableScanOperator; -import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.exec.UnionOperator; +import org.apache.hadoop.hive.ql.exec.*; import org.apache.hadoop.hive.ql.exec.spark.SparkTask; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; -import org.apache.hadoop.hive.ql.lib.CompositeProcessor; -import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; -import org.apache.hadoop.hive.ql.lib.Dispatcher; -import org.apache.hadoop.hive.ql.lib.ForwardWalker; -import org.apache.hadoop.hive.ql.lib.GraphWalker; -import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.lib.NodeProcessor; -import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.lib.Rule; -import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.lib.*; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.optimizer.physical.CrossProductCheck; import org.apache.hadoop.hive.ql.optimizer.physical.NullScanOptimizer; @@ -59,6 +44,7 @@ import org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger; import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer; import org.apache.hadoop.hive.ql.optimizer.spark.SetSparkReducerParallelism; +import org.apache.hadoop.hive.ql.optimizer.spark.SparkSortMergeJoinFactory; import org.apache.hadoop.hive.ql.parse.GlobalLimitCtx; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -136,7 +122,7 @@ protected void generateTaskTree(List> rootTasks, Pa GenSparkWork genSparkWork = new GenSparkWork(GenSparkUtils.getUtils()); GenSparkProcContext procCtx = new GenSparkProcContext( - conf, tempParseContext, mvTask, rootTasks, inputs, outputs); + conf, tempParseContext, mvTask, rootTasks, inputs, outputs, pCtx.getTopOps()); // create a walker which walks the tree in a DFS manner while maintaining // the operator stack. The dispatcher generates the plan from the operator tree @@ -178,6 +164,20 @@ public Object process(Node n, Stack s, GraphWalker ogw = new GenSparkWorkWalker(disp, procCtx); ogw.startWalking(topNodes, null); + + + // ------------------- Second Pass ----------------------- + // SMB Join optimizations to add the "localWork" and bucketing data structures to MapWork. + opRules.clear(); + opRules.put(new TypeRule(SMBMapJoinOperator.class), + SparkSortMergeJoinFactory.getTableScanMapJoin()); + + disp = new DefaultRuleDispatcher(null, opRules, procCtx); + topNodes = new ArrayList(); + topNodes.addAll(pCtx.getTopOps().values()); + ogw = new GenSparkWorkWalker(disp, procCtx); + ogw.startWalking(topNodes, null); + // we need to clone some operator plans and remove union operators still for (BaseWork w: procCtx.workWithUnionOperators) { GenSparkUtils.getUtils().removeUnionOperators(conf, procCtx, w); diff --git ql/src/test/results/clientpositive/spark/auto_join0.q.out ql/src/test/results/clientpositive/spark/auto_join0.q.out index 76ff63d..e5bff96 100644 --- ql/src/test/results/clientpositive/spark/auto_join0.q.out +++ ql/src/test/results/clientpositive/spark/auto_join0.q.out @@ -29,8 +29,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/auto_join10.q.out ql/src/test/results/clientpositive/spark/auto_join10.q.out index 05a5912..59a31f6 100644 --- ql/src/test/results/clientpositive/spark/auto_join10.q.out +++ ql/src/test/results/clientpositive/spark/auto_join10.q.out @@ -23,7 +23,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/auto_join11.q.out ql/src/test/results/clientpositive/spark/auto_join11.q.out index 998c28b..fa7c76e 100644 --- ql/src/test/results/clientpositive/spark/auto_join11.q.out +++ ql/src/test/results/clientpositive/spark/auto_join11.q.out @@ -23,7 +23,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/auto_join12.q.out ql/src/test/results/clientpositive/spark/auto_join12.q.out index d2b7993..62508c7 100644 --- ql/src/test/results/clientpositive/spark/auto_join12.q.out +++ ql/src/test/results/clientpositive/spark/auto_join12.q.out @@ -29,7 +29,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/auto_join13.q.out ql/src/test/results/clientpositive/spark/auto_join13.q.out index 78aa01e..031c72a 100644 --- ql/src/test/results/clientpositive/spark/auto_join13.q.out +++ ql/src/test/results/clientpositive/spark/auto_join13.q.out @@ -30,7 +30,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/auto_join15.q.out ql/src/test/results/clientpositive/spark/auto_join15.q.out index 5916070..47a1d5e 100644 --- ql/src/test/results/clientpositive/spark/auto_join15.q.out +++ ql/src/test/results/clientpositive/spark/auto_join15.q.out @@ -23,8 +23,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/auto_join16.q.out ql/src/test/results/clientpositive/spark/auto_join16.q.out index 0b6807d..514a663 100644 --- ql/src/test/results/clientpositive/spark/auto_join16.q.out +++ ql/src/test/results/clientpositive/spark/auto_join16.q.out @@ -23,7 +23,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/auto_join18.q.out ql/src/test/results/clientpositive/spark/auto_join18.q.out index 6083b38..5cf6728 100644 --- ql/src/test/results/clientpositive/spark/auto_join18.q.out +++ ql/src/test/results/clientpositive/spark/auto_join18.q.out @@ -32,9 +32,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) Reducer 6 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out index 01c8f0a..c35631a 100644 --- ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out +++ ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out @@ -34,9 +34,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) Reducer 6 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/auto_join20.q.out ql/src/test/results/clientpositive/spark/auto_join20.q.out index a8f2b9a..388dfb5 100644 --- ql/src/test/results/clientpositive/spark/auto_join20.q.out +++ ql/src/test/results/clientpositive/spark/auto_join20.q.out @@ -23,8 +23,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -174,8 +174,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/auto_join21.q.out ql/src/test/results/clientpositive/spark/auto_join21.q.out index f9ac35d..98aed79 100644 --- ql/src/test/results/clientpositive/spark/auto_join21.q.out +++ ql/src/test/results/clientpositive/spark/auto_join21.q.out @@ -13,7 +13,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/auto_join22.q.out ql/src/test/results/clientpositive/spark/auto_join22.q.out index 516322c..ba8a352 100644 --- ql/src/test/results/clientpositive/spark/auto_join22.q.out +++ ql/src/test/results/clientpositive/spark/auto_join22.q.out @@ -14,7 +14,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/auto_join23.q.out ql/src/test/results/clientpositive/spark/auto_join23.q.out index ce5a670..798d043 100644 --- ql/src/test/results/clientpositive/spark/auto_join23.q.out +++ ql/src/test/results/clientpositive/spark/auto_join23.q.out @@ -13,7 +13,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/auto_join24.q.out ql/src/test/results/clientpositive/spark/auto_join24.q.out index 15b8888..876d116 100644 --- ql/src/test/results/clientpositive/spark/auto_join24.q.out +++ ql/src/test/results/clientpositive/spark/auto_join24.q.out @@ -34,7 +34,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/auto_join27.q.out ql/src/test/results/clientpositive/spark/auto_join27.q.out index 67f5739..948ea41 100644 --- ql/src/test/results/clientpositive/spark/auto_join27.q.out +++ ql/src/test/results/clientpositive/spark/auto_join27.q.out @@ -30,9 +30,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 1), Union 3 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) Union 3 <- Map 6 (NONE, 0), Reducer 2 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/auto_join28.q.out ql/src/test/results/clientpositive/spark/auto_join28.q.out index b979661..0ddec0a 100644 --- ql/src/test/results/clientpositive/spark/auto_join28.q.out +++ ql/src/test/results/clientpositive/spark/auto_join28.q.out @@ -13,7 +13,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -111,7 +111,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -212,7 +212,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -313,7 +313,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/auto_join29.q.out ql/src/test/results/clientpositive/spark/auto_join29.q.out index 0951b8d..8822778 100644 --- ql/src/test/results/clientpositive/spark/auto_join29.q.out +++ ql/src/test/results/clientpositive/spark/auto_join29.q.out @@ -13,7 +13,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -619,7 +619,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1228,7 +1228,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1849,7 +1849,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2467,7 +2467,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2575,7 +2575,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3184,7 +3184,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3293,7 +3293,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3427,7 +3427,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/auto_join30.q.out ql/src/test/results/clientpositive/spark/auto_join30.q.out index 98b3974..3ab381f 100644 --- ql/src/test/results/clientpositive/spark/auto_join30.q.out +++ ql/src/test/results/clientpositive/spark/auto_join30.q.out @@ -22,9 +22,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) Reducer 6 <- Map 5 (GROUP SORT, 1) #### A masked pattern was here #### Vertices: @@ -175,9 +175,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) Reducer 6 <- Map 5 (GROUP SORT, 1) #### A masked pattern was here #### Vertices: @@ -322,9 +322,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) Reducer 6 <- Map 5 (GROUP SORT, 1) #### A masked pattern was here #### Vertices: @@ -475,9 +475,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 8 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) Reducer 6 <- Map 5 (GROUP SORT, 1) Reducer 8 <- Map 7 (GROUP SORT, 1) #### A masked pattern was here #### @@ -671,9 +671,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 8 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) Reducer 6 <- Map 5 (GROUP SORT, 1) Reducer 8 <- Map 7 (GROUP SORT, 1) #### A masked pattern was here #### @@ -858,9 +858,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 8 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) Reducer 6 <- Map 5 (GROUP SORT, 1) Reducer 8 <- Map 7 (GROUP SORT, 1) #### A masked pattern was here #### @@ -1045,9 +1045,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 8 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) Reducer 6 <- Map 5 (GROUP SORT, 1) Reducer 8 <- Map 7 (GROUP SORT, 1) #### A masked pattern was here #### @@ -1232,9 +1232,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 8 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) Reducer 6 <- Map 5 (GROUP SORT, 1) Reducer 8 <- Map 7 (GROUP SORT, 1) #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/spark/auto_join31.q.out ql/src/test/results/clientpositive/spark/auto_join31.q.out index df502c8..ae41dad 100644 --- ql/src/test/results/clientpositive/spark/auto_join31.q.out +++ ql/src/test/results/clientpositive/spark/auto_join31.q.out @@ -28,9 +28,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 8 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) Reducer 6 <- Map 5 (GROUP SORT, 1) Reducer 8 <- Map 7 (GROUP SORT, 1) #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/spark/auto_join32.q.out ql/src/test/results/clientpositive/spark/auto_join32.q.out index 8d83188..ab9cb30 100644 --- ql/src/test/results/clientpositive/spark/auto_join32.q.out +++ ql/src/test/results/clientpositive/spark/auto_join32.q.out @@ -35,7 +35,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -168,77 +168,51 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: v - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: name is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: name (type: string) - sort order: + - Map-reduce partition columns: name (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: registration (type: string) - Map 4 - Map Operator Tree: - TableScan alias: s Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: name (type: string) - sort order: + - Map-reduce partition columns: name (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {name} + 1 {registration} + keys: + 0 name (type: string) + 1 name (type: string) + outputColumnNames: _col0, _col8 + Select Operator + expressions: _col0 (type: string), _col8 (type: string) + outputColumnNames: _col0, _col8 + Group By Operator + aggregations: count(DISTINCT _col8) + keys: _col0 (type: string), _col8 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col1} - outputColumnNames: _col0, _col8 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col8 (type: string) - outputColumnNames: _col0, _col8 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT _col8) - keys: _col0 (type: string), _col8 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -316,77 +290,51 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: v - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: name is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: name (type: string) - sort order: + - Map-reduce partition columns: name (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: registration (type: string) - Map 4 - Map Operator Tree: - TableScan alias: s Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: name (type: string) - sort order: + - Map-reduce partition columns: name (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {name} + 1 {registration} + keys: + 0 name (type: string) + 1 name (type: string) + outputColumnNames: _col0, _col8 + Select Operator + expressions: _col0 (type: string), _col8 (type: string) + outputColumnNames: _col0, _col8 + Group By Operator + aggregations: count(DISTINCT _col8) + keys: _col0 (type: string), _col8 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col1} - outputColumnNames: _col0, _col8 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col8 (type: string) - outputColumnNames: _col0, _col8 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT _col8) - keys: _col0 (type: string), _col8 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -488,52 +436,22 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 - Map 4 Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col1} - outputColumnNames: _col0, _col9 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col9 (type: string) - outputColumnNames: _col0, _col9 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT _col9) - keys: _col0 (type: string), _col9 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out index e64d4fb..80d82e5 100644 --- ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out +++ ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out @@ -64,8 +64,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -76,58 +75,34 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -184,96 +159,67 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -356,40 +302,43 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1), Reducer 8 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 7 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1), Map 9 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 8 <- Reducer 7 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 4 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map 6 + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) + Map 4 Map Operator Tree: TableScan alias: a @@ -397,69 +346,45 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map 9 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 4 + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -468,61 +393,29 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 {VALUE._col0} outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 8 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Stage: Stage-0 @@ -601,8 +494,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -617,62 +509,34 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key < 6) and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -739,26 +603,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -768,45 +618,34 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -897,8 +736,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -913,62 +751,34 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1049,8 +859,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1065,62 +874,34 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key < 8) and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1178,7 +959,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1301,8 +1082,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1317,58 +1097,34 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1427,8 +1183,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1443,81 +1198,37 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key < 6) and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key < 6) and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 + 1 + 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 - 1 - 2 - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1592,26 +1303,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1621,45 +1318,34 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1747,8 +1433,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark - Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1759,64 +1443,39 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 0 Data size: 80 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col6 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col6 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 Stage: Stage-3 Dependency Collection @@ -1980,8 +1639,7 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1992,82 +1650,54 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_1.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_1.q.out index 9158d65..a1ce329 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_1.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_1.q.out @@ -149,8 +149,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -163,13 +162,26 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 1 + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -272,117 +284,21 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_big/ds=2008-04-08 [b] /bucket_big/ds=2008-04-09 [b] - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-08 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 2 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [a] Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -466,83 +382,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-08 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 2 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -550,13 +395,26 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -660,46 +518,20 @@ STAGE PLANS: /bucket_big/ds=2008-04-08 [a] /bucket_big/ds=2008-04-09 [a] Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -783,83 +615,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-08 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 2 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -867,13 +628,26 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -977,46 +751,20 @@ STAGE PLANS: /bucket_big/ds=2008-04-08 [a] /bucket_big/ds=2008-04-09 [a] Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out index f608cc5..07c5435 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out @@ -75,7 +75,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1), Union 2 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) #### A masked pattern was here #### Vertices: @@ -228,9 +228,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_11.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_11.q.out index 3c26363..66162a4 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_11.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_11.q.out @@ -146,7 +146,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -468,7 +468,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -789,7 +789,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1125,7 +1125,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out index 65e496f..11d1530 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out @@ -211,7 +211,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out index a5a281b..a10f3a3 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out @@ -85,8 +85,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark - Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -97,64 +95,39 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 0 Data size: 80 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Select Operator - expressions: _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: _col0 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Select Operator + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 Stage: Stage-3 Dependency Collection @@ -302,8 +275,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark - Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -314,64 +285,39 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 0 Data size: 80 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Select Operator - expressions: _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: _col0 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Select Operator + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 Stage: Stage-3 Dependency Collection @@ -519,8 +465,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark - Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -531,64 +475,39 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 0 Data size: 80 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Select Operator - expressions: _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: _col0 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Select Operator + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 Stage: Stage-3 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_14.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_14.q.out index 2fc3bb6..f353806 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_14.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_14.q.out @@ -52,64 +52,42 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 49 Data size: 198 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 49 Data size: 198 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -159,8 +137,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -168,55 +145,34 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 520 Data size: 2080 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 520 Data size: 2080 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 572 Data size: 2288 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 572 Data size: 2288 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_15.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_15.q.out index 74cbd7c..e3306da 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_15.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_15.q.out @@ -50,64 +50,42 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 49 Data size: 198 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 49 Data size: 198 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -133,8 +111,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -142,55 +119,34 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 49 Data size: 198 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 49 Data size: 198 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_16.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_16.q.out new file mode 100644 index 0000000..9d89959 --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_16.q.out @@ -0,0 +1,258 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE stage_bucket_big +( +key BIGINT, +value STRING +) +PARTITIONED BY (file_tag STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@stage_bucket_big +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE stage_bucket_big +( +key BIGINT, +value STRING +) +PARTITIONED BY (file_tag STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@stage_bucket_big +PREHOOK: query: CREATE TABLE bucket_big +( +key BIGINT, +value STRING +) +PARTITIONED BY (day STRING, pri bigint) +clustered by (key) sorted by (key) into 12 buckets +stored as RCFile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket_big +POSTHOOK: query: CREATE TABLE bucket_big +( +key BIGINT, +value STRING +) +PARTITIONED BY (day STRING, pri bigint) +clustered by (key) sorted by (key) into 12 buckets +stored as RCFile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket_big +PREHOOK: query: CREATE TABLE stage_bucket_small +( +key BIGINT, +value string +) +PARTITIONED BY (file_tag STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@stage_bucket_small +POSTHOOK: query: CREATE TABLE stage_bucket_small +( +key BIGINT, +value string +) +PARTITIONED BY (file_tag STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@stage_bucket_small +PREHOOK: query: CREATE TABLE bucket_small +( +key BIGINT, +value string +) +PARTITIONED BY (pri bigint) +clustered by (key) sorted by (key) into 12 buckets +stored as RCFile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket_small +POSTHOOK: query: CREATE TABLE bucket_small +( +key BIGINT, +value string +) +PARTITIONED BY (pri bigint) +clustered by (key) sorted by (key) into 12 buckets +stored as RCFile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket_small +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' overwrite into table stage_bucket_small partition (file_tag='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@stage_bucket_small +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' overwrite into table stage_bucket_small partition (file_tag='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@stage_bucket_small +POSTHOOK: Output: default@stage_bucket_small@file_tag=1 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' overwrite into table stage_bucket_small partition (file_tag='2') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@stage_bucket_small +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' overwrite into table stage_bucket_small partition (file_tag='2') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@stage_bucket_small +POSTHOOK: Output: default@stage_bucket_small@file_tag=2 +PREHOOK: query: insert overwrite table bucket_small partition(pri) +select +key, +value, +file_tag as pri +from +stage_bucket_small +where file_tag between 1 and 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@stage_bucket_small +PREHOOK: Input: default@stage_bucket_small@file_tag=1 +PREHOOK: Input: default@stage_bucket_small@file_tag=2 +PREHOOK: Output: default@bucket_small +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucket_small partition(pri) +select +key, +value, +file_tag as pri +from +stage_bucket_small +where file_tag between 1 and 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stage_bucket_small +POSTHOOK: Input: default@stage_bucket_small@file_tag=1 +POSTHOOK: Input: default@stage_bucket_small@file_tag=2 +POSTHOOK: Output: default@bucket_small@pri=1 +POSTHOOK: Output: default@bucket_small@pri=2 +POSTHOOK: Lineage: bucket_small PARTITION(pri=1).key SIMPLE [(stage_bucket_small)stage_bucket_small.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: bucket_small PARTITION(pri=1).value SIMPLE [(stage_bucket_small)stage_bucket_small.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: bucket_small PARTITION(pri=2).key SIMPLE [(stage_bucket_small)stage_bucket_small.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: bucket_small PARTITION(pri=2).value SIMPLE [(stage_bucket_small)stage_bucket_small.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' overwrite into table stage_bucket_big partition (file_tag='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@stage_bucket_big +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' overwrite into table stage_bucket_big partition (file_tag='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@stage_bucket_big +POSTHOOK: Output: default@stage_bucket_big@file_tag=1 +PREHOOK: query: insert overwrite table bucket_big partition(day,pri) +select +key, +value, +'day1' as day, +1 as pri +from +stage_bucket_big +where +file_tag='1' +PREHOOK: type: QUERY +PREHOOK: Input: default@stage_bucket_big +PREHOOK: Input: default@stage_bucket_big@file_tag=1 +PREHOOK: Output: default@bucket_big +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucket_big partition(day,pri) +select +key, +value, +'day1' as day, +1 as pri +from +stage_bucket_big +where +file_tag='1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stage_bucket_big +POSTHOOK: Input: default@stage_bucket_big@file_tag=1 +POSTHOOK: Output: default@bucket_big@day=day1/pri=1 +POSTHOOK: Lineage: bucket_big PARTITION(day=day1,pri=1).key SIMPLE [(stage_bucket_big)stage_bucket_big.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: bucket_big PARTITION(day=day1,pri=1).value SIMPLE [(stage_bucket_big)stage_bucket_big.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select +a.key , +a.value , +b.value , +'day1' as day, +1 as pri +from +( +select +key, +value +from bucket_big where day='day1' +) a +left outer join +( +select +key, +value +from bucket_small +where pri between 1 and 2 +) b +on +(a.key = b.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big +PREHOOK: Input: default@bucket_big@day=day1/pri=1 +PREHOOK: Input: default@bucket_small +PREHOOK: Input: default@bucket_small@pri=1 +PREHOOK: Input: default@bucket_small@pri=2 +#### A masked pattern was here #### +POSTHOOK: query: select +a.key , +a.value , +b.value , +'day1' as day, +1 as pri +from +( +select +key, +value +from bucket_big where day='day1' +) a +left outer join +( +select +key, +value +from bucket_small +where pri between 1 and 2 +) b +on +(a.key = b.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big +POSTHOOK: Input: default@bucket_big@day=day1/pri=1 +POSTHOOK: Input: default@bucket_small +POSTHOOK: Input: default@bucket_small@pri=1 +POSTHOOK: Input: default@bucket_small@pri=2 +#### A masked pattern was here #### +0 val_0 val_0 day1 1 +0 val_0 val_0 day1 1 +0 val_0 val_0 day1 1 +0 val_0 val_0 day1 1 +0 val_0 val_0 day1 1 +0 val_0 val_0 day1 1 +103 val_103 val_103 day1 1 +103 val_103 val_103 day1 1 +103 val_103 val_103 day1 1 +103 val_103 val_103 day1 1 +169 val_169 val_169 day1 1 +169 val_169 val_169 day1 1 +169 val_169 val_169 day1 1 +169 val_169 val_169 day1 1 +169 val_169 val_169 day1 1 +169 val_169 val_169 day1 1 +169 val_169 val_169 day1 1 +169 val_169 val_169 day1 1 +172 val_172 val_172 day1 1 +172 val_172 val_172 day1 1 +172 val_172 val_172 day1 1 +172 val_172 val_172 day1 1 +374 val_374 val_374 day1 1 +374 val_374 val_374 day1 1 diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out index d1bb7a0..184b046 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out @@ -129,83 +129,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-08 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 4 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -213,13 +142,26 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -323,46 +265,20 @@ STAGE PLANS: /bucket_big/ds=2008-04-08 [a] /bucket_big/ds=2008-04-09 [a] Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -448,83 +364,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-08 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 4 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -532,13 +377,26 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -642,46 +500,20 @@ STAGE PLANS: /bucket_big/ds=2008-04-08 [a] /bucket_big/ds=2008-04-09 [a] Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out index d57a1d7..bbc5144 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out @@ -129,8 +129,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -143,13 +142,26 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 1 + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -203,166 +215,21 @@ STAGE PLANS: name: default.bucket_big Truncated Path -> Alias: /bucket_big/ds=2008-04-08 [b] - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-08 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 2 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-09 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 2 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [a] - /bucket_small/ds=2008-04-09 [a] Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -446,132 +313,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-08 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 2 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-09 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 2 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - /bucket_small/ds=2008-04-09 [b] - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -579,13 +326,26 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -640,46 +400,20 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_big/ds=2008-04-08 [a] Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -763,132 +497,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-08 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 2 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-09 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 2 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - /bucket_small/ds=2008-04-09 [b] - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -896,13 +510,26 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -957,46 +584,20 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_big/ds=2008-04-08 [a] Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out index 8244c50..95d5e16 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out @@ -145,8 +145,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -159,13 +158,26 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 1 + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -219,166 +231,21 @@ STAGE PLANS: name: default.bucket_big Truncated Path -> Alias: /bucket_big/ds=2008-04-08 [b] - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-08 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 4 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-09 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 4 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [a] - /bucket_small/ds=2008-04-09 [a] Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -462,132 +329,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-08 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 4 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-09 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 4 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - /bucket_small/ds=2008-04-09 [b] - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -595,13 +342,26 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -656,46 +416,20 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_big/ds=2008-04-08 [a] Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -779,132 +513,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-08 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 4 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-09 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 4 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - /bucket_small/ds=2008-04-09 [b] - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -912,13 +526,26 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -973,46 +600,20 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_big/ds=2008-04-08 [a] Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out index 2ab1bca..42a65af 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out @@ -110,8 +110,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -124,13 +123,26 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 1 + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -180,113 +192,21 @@ STAGE PLANS: name: default.bucket_big Truncated Path -> Alias: /bucket_big [b] - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: bucket_small - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 4 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 4 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - Truncated Path -> Alias: - /bucket_small [a] Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -364,79 +284,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: bucket_small - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 4 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 4 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - Truncated Path -> Alias: - /bucket_small [b] - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -444,13 +297,26 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -501,46 +367,20 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_big [a] Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -618,79 +458,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: bucket_small - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 4 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 4 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - Truncated Path -> Alias: - /bucket_small [b] - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -698,13 +471,26 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -755,46 +541,20 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_big [a] Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_6.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_6.q.out index bc4a163..226d83d 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_6.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_6.q.out @@ -100,27 +100,13 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan alias: c Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -131,7 +117,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: value (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 6 + Map 4 Map Operator Tree: TableScan alias: a @@ -139,61 +125,47 @@ STAGE PLANS: Filter Operator predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 14 Data size: 1479 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 14 Data size: 1479 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {value} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: - 0 {VALUE._col0} - 1 - outputColumnNames: _col1 - Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: 0 1 - Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -236,9 +208,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -257,30 +228,25 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Map 6 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 14 Data size: 1479 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 14 Data size: 1479 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {value} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -289,17 +255,13 @@ STAGE PLANS: condition expressions: 0 1 - Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: @@ -307,33 +269,15 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 - outputColumnNames: _col1 - Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -372,39 +316,33 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 14 Data size: 1479 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 14 Data size: 1479 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 6 + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {value} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Map 4 Map Operator Tree: TableScan alias: h @@ -423,49 +361,27 @@ STAGE PLANS: condition map: Inner Join 0 to 1 condition expressions: - 0 {VALUE._col0} - 1 - outputColumnNames: _col1 - Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: 0 1 - Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -505,7 +421,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -621,85 +537,48 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 + 1 + 2 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 - 1 - 2 - Statistics: Num rows: 1599 Data size: 6397 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1599 Data size: 6397 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -738,27 +617,13 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan alias: c Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -769,7 +634,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: value (type: string) Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE - Map 6 + Map 4 Map Operator Tree: TableScan alias: a @@ -777,61 +642,47 @@ STAGE PLANS: Filter Operator predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 14 Data size: 1479 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 14 Data size: 1479 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {value} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: - 0 {VALUE._col0} - 1 - outputColumnNames: _col1 - Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: 0 1 - Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -870,27 +721,13 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan alias: c Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -901,7 +738,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: value (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 6 + Map 4 Map Operator Tree: TableScan alias: a @@ -909,61 +746,47 @@ STAGE PLANS: Filter Operator predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 14 Data size: 1479 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 14 Data size: 1479 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {value} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: - 0 {VALUE._col0} - 1 - outputColumnNames: _col1 - Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: 0 1 - Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1003,7 +826,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1119,85 +942,48 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 + 1 + 2 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 - 1 - 2 - Statistics: Num rows: 1599 Data size: 6397 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1599 Data size: 6397 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1236,27 +1022,13 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan alias: c Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1267,7 +1039,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: value (type: string) Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE - Map 6 + Map 4 Map Operator Tree: TableScan alias: a @@ -1275,61 +1047,47 @@ STAGE PLANS: Filter Operator predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 14 Data size: 1479 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 14 Data size: 1479 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {value} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: - 0 {VALUE._col0} - 1 - outputColumnNames: _col1 - Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: 0 1 - Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out index 16ef3ae..1b8969a 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out @@ -162,8 +162,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -176,13 +175,26 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 1 + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -285,166 +297,21 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_big/ds=2008-04-08 [b] /bucket_big/ds=2008-04-09 [b] - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-08 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 4 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-09 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 4 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [a] - /bucket_small/ds=2008-04-09 [a] Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -530,132 +397,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-08 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 4 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-09 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 4 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - /bucket_small/ds=2008-04-09 [b] - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -663,13 +410,26 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -773,46 +533,20 @@ STAGE PLANS: /bucket_big/ds=2008-04-08 [a] /bucket_big/ds=2008-04-09 [a] Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -898,132 +632,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-08 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 4 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-09 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 4 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - /bucket_small/ds=2008-04-09 [b] - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -1031,13 +645,26 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1141,46 +768,20 @@ STAGE PLANS: /bucket_big/ds=2008-04-08 [a] /bucket_big/ds=2008-04-09 [a] Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out index 9fd3e5a..6f75068 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out @@ -162,8 +162,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -176,13 +175,26 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 1 + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -285,166 +297,21 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_big/ds=2008-04-08 [b] /bucket_big/ds=2008-04-09 [b] - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-08 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 2 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-09 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 2 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [a] - /bucket_small/ds=2008-04-09 [a] Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -530,132 +397,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-08 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 2 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-09 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 2 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - /bucket_small/ds=2008-04-09 [b] - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -663,13 +410,26 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -773,46 +533,20 @@ STAGE PLANS: /bucket_big/ds=2008-04-08 [a] /bucket_big/ds=2008-04-09 [a] Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -900,132 +634,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-08 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 2 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-09 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 2 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - /bucket_small/ds=2008-04-09 [b] - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -1033,13 +647,26 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1143,46 +770,20 @@ STAGE PLANS: /bucket_big/ds=2008-04-08 [a] /bucket_big/ds=2008-04-09 [a] Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out index a7f994f..42c1c4e 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out @@ -64,8 +64,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -76,58 +75,34 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -178,8 +153,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -190,65 +164,41 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -314,96 +264,67 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -486,40 +407,43 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1), Reducer 8 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 7 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1), Map 9 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 8 <- Reducer 7 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 4 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map 6 + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) + Map 4 Map Operator Tree: TableScan alias: a @@ -527,69 +451,45 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map 9 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 4 + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -598,61 +498,29 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 {VALUE._col0} outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 8 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Stage: Stage-0 @@ -731,8 +599,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -747,62 +614,34 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key < 6) and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -869,26 +708,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -898,45 +723,34 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1027,8 +841,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1043,62 +856,34 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1179,8 +964,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1195,62 +979,34 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key < 8) and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1308,7 +1064,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1431,8 +1187,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1447,58 +1202,34 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1549,8 +1280,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1559,64 +1289,36 @@ STAGE PLANS: alias: a Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 6) and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1677,8 +1379,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1693,81 +1394,37 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key < 6) and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key < 6) and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 + 1 + 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 - 1 - 2 - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1842,26 +1499,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1871,45 +1514,34 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1972,8 +1604,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1984,58 +1615,34 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2086,8 +1693,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2098,65 +1704,41 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2222,96 +1804,67 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2394,40 +1947,43 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1), Reducer 8 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 7 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1), Map 9 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 8 <- Reducer 7 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 4 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map 6 + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) + Map 4 Map Operator Tree: TableScan alias: a @@ -2435,69 +1991,45 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map 9 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 4 + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -2506,61 +2038,29 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 {VALUE._col0} outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 8 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Stage: Stage-0 @@ -2639,8 +2139,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2655,62 +2154,34 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key < 6) and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2777,26 +2248,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -2806,45 +2263,34 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2935,8 +2381,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2951,62 +2396,34 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3087,8 +2504,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3103,62 +2519,34 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key < 8) and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3211,8 +2599,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3227,58 +2614,34 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3329,8 +2692,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3339,64 +2701,36 @@ STAGE PLANS: alias: a Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 6) and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3457,8 +2791,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3473,81 +2806,37 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key < 6) and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key < 6) and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 + 1 + 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 - 1 - 2 - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3622,26 +2911,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -3651,45 +2926,34 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/spark/bucket2.q.out ql/src/test/results/clientpositive/spark/bucket2.q.out index b1b2997..f2b2527 100644 --- ql/src/test/results/clientpositive/spark/bucket2.q.out +++ ql/src/test/results/clientpositive/spark/bucket2.q.out @@ -45,7 +45,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/bucket3.q.out ql/src/test/results/clientpositive/spark/bucket3.q.out index 019c11a..1beaa4b 100644 --- ql/src/test/results/clientpositive/spark/bucket3.q.out +++ ql/src/test/results/clientpositive/spark/bucket3.q.out @@ -49,7 +49,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/bucket4.q.out ql/src/test/results/clientpositive/spark/bucket4.q.out index 2cbab11..707172c 100644 --- ql/src/test/results/clientpositive/spark/bucket4.q.out +++ ql/src/test/results/clientpositive/spark/bucket4.q.out @@ -41,7 +41,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/bucket_map_join_1.q.out ql/src/test/results/clientpositive/spark/bucket_map_join_1.q.out index 4ec619e..8e0b0bd 100644 --- ql/src/test/results/clientpositive/spark/bucket_map_join_1.q.out +++ ql/src/test/results/clientpositive/spark/bucket_map_join_1.q.out @@ -112,7 +112,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/bucket_map_join_2.q.out ql/src/test/results/clientpositive/spark/bucket_map_join_2.q.out index 1c288c2..7e73aa6 100644 --- ql/src/test/results/clientpositive/spark/bucket_map_join_2.q.out +++ ql/src/test/results/clientpositive/spark/bucket_map_join_2.q.out @@ -112,7 +112,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin10.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin10.q.out index 8be3edd..a3100a8 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin10.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin10.q.out @@ -200,7 +200,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin11.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin11.q.out index 9e45843..5605822 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin11.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin11.q.out @@ -210,7 +210,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -613,7 +613,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin12.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin12.q.out index 0c1ac4b..80747a9 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin12.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin12.q.out @@ -169,7 +169,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -465,7 +465,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out index dc1b8cf..b666a92 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out @@ -131,7 +131,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -471,7 +471,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -762,7 +762,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1055,7 +1055,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin8.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin8.q.out index 6d72fdf..b013022 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin8.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin8.q.out @@ -134,7 +134,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -439,7 +439,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin9.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin9.q.out index d80bdcf..dd27a8b 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin9.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin9.q.out @@ -142,7 +142,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -480,7 +480,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/count.q.out ql/src/test/results/clientpositive/spark/count.q.out index c527c1d..89d5544 100644 --- ql/src/test/results/clientpositive/spark/count.q.out +++ ql/src/test/results/clientpositive/spark/count.q.out @@ -41,7 +41,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -115,7 +115,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -184,7 +184,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -252,7 +252,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/ctas.q.out ql/src/test/results/clientpositive/spark/ctas.q.out index 0ded266..d526fc5 100644 --- ql/src/test/results/clientpositive/spark/ctas.q.out +++ ql/src/test/results/clientpositive/spark/ctas.q.out @@ -33,8 +33,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -182,8 +182,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -331,8 +331,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -545,8 +545,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -737,8 +737,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out index 590b265..c4b56ac 100644 --- ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out +++ ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out @@ -41,7 +41,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -204,7 +204,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/escape_clusterby1.q.out ql/src/test/results/clientpositive/spark/escape_clusterby1.q.out index 52bdf6a..8c73946 100644 --- ql/src/test/results/clientpositive/spark/escape_clusterby1.q.out +++ ql/src/test/results/clientpositive/spark/escape_clusterby1.q.out @@ -14,7 +14,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -65,7 +65,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/escape_distributeby1.q.out ql/src/test/results/clientpositive/spark/escape_distributeby1.q.out index 736db5e..008f9e8 100644 --- ql/src/test/results/clientpositive/spark/escape_distributeby1.q.out +++ ql/src/test/results/clientpositive/spark/escape_distributeby1.q.out @@ -14,7 +14,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -65,7 +65,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/escape_orderby1.q.out ql/src/test/results/clientpositive/spark/escape_orderby1.q.out index 6e1c0cf..bf10d0d 100644 --- ql/src/test/results/clientpositive/spark/escape_orderby1.q.out +++ ql/src/test/results/clientpositive/spark/escape_orderby1.q.out @@ -14,7 +14,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -64,7 +64,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/escape_sortby1.q.out ql/src/test/results/clientpositive/spark/escape_sortby1.q.out index 58b663c..6324e1d 100644 --- ql/src/test/results/clientpositive/spark/escape_sortby1.q.out +++ ql/src/test/results/clientpositive/spark/escape_sortby1.q.out @@ -14,7 +14,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -64,7 +64,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/groupby1.q.out ql/src/test/results/clientpositive/spark/groupby1.q.out index 847f45c..8e54ce3 100644 --- ql/src/test/results/clientpositive/spark/groupby1.q.out +++ ql/src/test/results/clientpositive/spark/groupby1.q.out @@ -26,8 +26,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/groupby10.q.out ql/src/test/results/clientpositive/spark/groupby10.q.out index 2095843..ea7964f 100644 --- ql/src/test/results/clientpositive/spark/groupby10.q.out +++ ql/src/test/results/clientpositive/spark/groupby10.q.out @@ -56,9 +56,9 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -270,9 +270,9 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -486,9 +486,9 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/groupby11.q.out ql/src/test/results/clientpositive/spark/groupby11.q.out index 70db5a5..1251ebe 100644 --- ql/src/test/results/clientpositive/spark/groupby11.q.out +++ ql/src/test/results/clientpositive/spark/groupby11.q.out @@ -44,9 +44,9 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/groupby2.q.out ql/src/test/results/clientpositive/spark/groupby2.q.out index 86e2f2a..2da49c5 100644 --- ql/src/test/results/clientpositive/spark/groupby2.q.out +++ ql/src/test/results/clientpositive/spark/groupby2.q.out @@ -24,7 +24,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/groupby3.q.out ql/src/test/results/clientpositive/spark/groupby3.q.out index 13a5fab..e371f41 100644 --- ql/src/test/results/clientpositive/spark/groupby3.q.out +++ ql/src/test/results/clientpositive/spark/groupby3.q.out @@ -42,8 +42,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/groupby3_map.q.out ql/src/test/results/clientpositive/spark/groupby3_map.q.out index dac2824..babc8a2 100644 --- ql/src/test/results/clientpositive/spark/groupby3_map.q.out +++ ql/src/test/results/clientpositive/spark/groupby3_map.q.out @@ -42,7 +42,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out index d2c054a..fa2aca3 100644 --- ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out +++ ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out @@ -46,7 +46,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out index ec6439a..c2997cd 100644 --- ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out +++ ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out @@ -42,8 +42,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 31) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -160,4 +160,4 @@ POSTHOOK: query: SELECT c1, c2, c3, c4, c5, c6, c7, ROUND(c8, 5), ROUND(c9, 5) F POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -130091.0 260.182 256.10355987055016 98.0 0.0 142.92680950752379 143.06995106518903 20428.07288 20469.0109 +130091.0 260.182 256.10355987055016 98.0 0.0 142.9268095075238 143.06995106518906 20428.07288 20469.0109 diff --git ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out index 0c9a7e1..897c387 100644 --- ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out +++ ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out @@ -42,7 +42,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out index 42fbb8c..c38c44a 100644 --- ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out +++ ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out @@ -46,7 +46,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/groupby4.q.out ql/src/test/results/clientpositive/spark/groupby4.q.out index 318c5a3..2586907 100644 --- ql/src/test/results/clientpositive/spark/groupby4.q.out +++ ql/src/test/results/clientpositive/spark/groupby4.q.out @@ -28,8 +28,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/groupby7_map.q.out ql/src/test/results/clientpositive/spark/groupby7_map.q.out index 22a05b5..c950180 100644 --- ql/src/test/results/clientpositive/spark/groupby7_map.q.out +++ ql/src/test/results/clientpositive/spark/groupby7_map.q.out @@ -40,8 +40,8 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 31) - Reducer 3 <- Map 1 (GROUP, 31) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/groupby7_map_multi_single_reducer.q.out ql/src/test/results/clientpositive/spark/groupby7_map_multi_single_reducer.q.out index bc453c6..977f28a 100644 --- ql/src/test/results/clientpositive/spark/groupby7_map_multi_single_reducer.q.out +++ ql/src/test/results/clientpositive/spark/groupby7_map_multi_single_reducer.q.out @@ -40,7 +40,7 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 31) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out index 2a07f2a..6a84dbe 100644 --- ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out +++ ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out @@ -40,10 +40,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 31) - Reducer 3 <- Reducer 2 (GROUP, 31) - Reducer 4 <- Map 1 (GROUP SORT, 31) - Reducer 5 <- Reducer 4 (GROUP, 31) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 4 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 31) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out index 00a0707..ec4a6c4 100644 --- ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out +++ ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out @@ -40,8 +40,8 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 31) - Reducer 3 <- Map 1 (GROUP, 31) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out index 36640ef..9aa7b16 100644 --- ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out +++ ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out @@ -40,9 +40,9 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 31) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/groupby8.q.out ql/src/test/results/clientpositive/spark/groupby8.q.out index d8295ce..0ef1e22 100644 --- ql/src/test/results/clientpositive/spark/groupby8.q.out +++ ql/src/test/results/clientpositive/spark/groupby8.q.out @@ -40,9 +40,9 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -830,9 +830,9 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/groupby8_map.q.out ql/src/test/results/clientpositive/spark/groupby8_map.q.out index b9aa597..c2826b2 100644 --- ql/src/test/results/clientpositive/spark/groupby8_map.q.out +++ ql/src/test/results/clientpositive/spark/groupby8_map.q.out @@ -40,9 +40,9 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 31) - Reducer 3 <- Reducer 2 (GROUP, 31) - Reducer 4 <- Reducer 2 (GROUP, 31) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 4 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 31) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out index b9aa597..c2826b2 100644 --- ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out +++ ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out @@ -40,9 +40,9 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 31) - Reducer 3 <- Reducer 2 (GROUP, 31) - Reducer 4 <- Reducer 2 (GROUP, 31) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 4 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 31) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out index b9aa597..c2826b2 100644 --- ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out +++ ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out @@ -40,9 +40,9 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 31) - Reducer 3 <- Reducer 2 (GROUP, 31) - Reducer 4 <- Reducer 2 (GROUP, 31) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 4 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 31) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/groupby9.q.out ql/src/test/results/clientpositive/spark/groupby9.q.out index bec2346..97ed7d7 100644 --- ql/src/test/results/clientpositive/spark/groupby9.q.out +++ ql/src/test/results/clientpositive/spark/groupby9.q.out @@ -40,9 +40,9 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -831,9 +831,9 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1622,9 +1622,9 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2413,8 +2413,8 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3201,9 +3201,9 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/groupby_complex_types.q.out ql/src/test/results/clientpositive/spark/groupby_complex_types.q.out index 16fadea..c4cfa3c 100644 --- ql/src/test/results/clientpositive/spark/groupby_complex_types.q.out +++ ql/src/test/results/clientpositive/spark/groupby_complex_types.q.out @@ -52,9 +52,9 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out index 7470843..e10cff1 100644 --- ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out +++ ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out @@ -40,10 +40,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Map 1 (GROUP, 1) - Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -213,16 +213,16 @@ POSTHOOK: query: SELECT DEST1.* FROM DEST1 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### +["0"] 3 +["10"] 1 +["100"] 2 +["103"] 2 +["104"] 2 ["105"] 1 -["145"] 1 -["181"] 1 -["195"] 2 -["244"] 1 -["335"] 1 -["336"] 1 -["4"] 1 -["421"] 1 -["83"] 2 +["11"] 1 +["111"] 1 +["113"] 2 +["114"] 1 PREHOOK: query: SELECT DEST2.* FROM DEST2 PREHOOK: type: QUERY PREHOOK: Input: default@dest2 @@ -231,13 +231,13 @@ POSTHOOK: query: SELECT DEST2.* FROM DEST2 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest2 #### A masked pattern was here #### -{"156":"val_156"} 1 -{"170":"val_170"} 1 -{"186":"val_186"} 1 -{"274":"val_274"} 1 -{"332":"val_332"} 1 -{"402":"val_402"} 1 -{"480":"val_480"} 3 -{"5":"val_5"} 3 -{"67":"val_67"} 2 -{"96":"val_96"} 1 +{"0":"val_0"} 3 +{"10":"val_10"} 1 +{"100":"val_100"} 2 +{"103":"val_103"} 2 +{"104":"val_104"} 2 +{"105":"val_105"} 1 +{"11":"val_11"} 1 +{"111":"val_111"} 1 +{"113":"val_113"} 2 +{"114":"val_114"} 1 diff --git ql/src/test/results/clientpositive/spark/groupby_cube1.q.out ql/src/test/results/clientpositive/spark/groupby_cube1.q.out index 169c4ac..edec4c7 100644 --- ql/src/test/results/clientpositive/spark/groupby_cube1.q.out +++ ql/src/test/results/clientpositive/spark/groupby_cube1.q.out @@ -32,7 +32,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -122,7 +122,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -199,8 +199,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -304,7 +304,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -405,10 +405,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) - Reducer 4 <- Map 1 (GROUP SORT, 1) - Reducer 5 <- Reducer 4 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out index d3457da..0d173b2 100644 --- ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out +++ ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out @@ -40,9 +40,9 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -226,8 +226,8 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out index 3abd0e3..63976f4 100644 --- ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out +++ ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out @@ -68,7 +68,7 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -307,9 +307,9 @@ STAGE PLANS: Stage: Stage-5 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Map 1 (GROUP SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer2.q.out ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer2.q.out index 7f74c62..a8a35a0 100644 --- ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer2.q.out +++ ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer2.q.out @@ -36,7 +36,7 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out index c4b7419..4543077 100644 --- ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out +++ ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out @@ -50,7 +50,7 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -232,7 +232,7 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -414,7 +414,7 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -596,7 +596,7 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/groupby_position.q.out ql/src/test/results/clientpositive/spark/groupby_position.q.out index 9e58189..5d182f5 100644 --- ql/src/test/results/clientpositive/spark/groupby_position.q.out +++ ql/src/test/results/clientpositive/spark/groupby_position.q.out @@ -40,8 +40,8 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -240,8 +240,8 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -440,8 +440,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -570,9 +570,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP SORT, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) Reducer 6 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/groupby_ppr.q.out ql/src/test/results/clientpositive/spark/groupby_ppr.q.out index 860aa58..b5c6427 100644 --- ql/src/test/results/clientpositive/spark/groupby_ppr.q.out +++ ql/src/test/results/clientpositive/spark/groupby_ppr.q.out @@ -101,7 +101,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out index 0aeff6b..a5f2b0d 100644 --- ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out +++ ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out @@ -32,7 +32,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -116,7 +116,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -193,8 +193,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -292,7 +292,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -393,10 +393,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) - Reducer 4 <- Map 1 (GROUP SORT, 1) - Reducer 5 <- Reducer 4 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out index 61dd2be..5d5955b 100644 --- ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out +++ ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out @@ -304,7 +304,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1211,7 +1211,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1443,7 +1443,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1711,7 +1711,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2370,7 +2370,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1) Union 2 <- Map 1 (NONE, 0), Reducer 4 (NONE, 0) #### A masked pattern was here #### Vertices: @@ -3470,7 +3470,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -4680,7 +4680,7 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 31) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) #### A masked pattern was here #### Vertices: Map 1 @@ -4849,7 +4849,7 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 31) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out index 99da734..0579a17 100644 --- ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out +++ ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out @@ -304,8 +304,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1229,8 +1229,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1479,8 +1479,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1765,8 +1765,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2442,8 +2442,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (GROUP SORT, 1) - Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) Union 2 <- Map 1 (NONE, 0), Reducer 5 (NONE, 0) #### A masked pattern was here #### Vertices: @@ -3578,8 +3578,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -4806,8 +4806,8 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 31) - Reducer 3 <- Reducer 2 (GROUP, 31) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 31) #### A masked pattern was here #### Vertices: Map 1 @@ -4990,8 +4990,8 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 31) - Reducer 3 <- Reducer 2 (GROUP, 31) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 31) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/having.q.out ql/src/test/results/clientpositive/spark/having.q.out index 5e9f20d..2551b17 100644 --- ql/src/test/results/clientpositive/spark/having.q.out +++ ql/src/test/results/clientpositive/spark/having.q.out @@ -12,7 +12,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -95,7 +95,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -476,7 +476,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -748,7 +748,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -949,7 +949,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1221,7 +1221,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/input14.q.out ql/src/test/results/clientpositive/spark/input14.q.out index e7d4db6..76dd278 100644 --- ql/src/test/results/clientpositive/spark/input14.q.out +++ ql/src/test/results/clientpositive/spark/input14.q.out @@ -34,7 +34,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/input17.q.out ql/src/test/results/clientpositive/spark/input17.q.out index 0882a29..eda38b5 100644 --- ql/src/test/results/clientpositive/spark/input17.q.out +++ ql/src/test/results/clientpositive/spark/input17.q.out @@ -34,7 +34,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/input18.q.out ql/src/test/results/clientpositive/spark/input18.q.out index 802fb0a..bb252aa 100644 --- ql/src/test/results/clientpositive/spark/input18.q.out +++ ql/src/test/results/clientpositive/spark/input18.q.out @@ -34,7 +34,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/input1_limit.q.out ql/src/test/results/clientpositive/spark/input1_limit.q.out index 33ecd07..92cd24f 100644 --- ql/src/test/results/clientpositive/spark/input1_limit.q.out +++ ql/src/test/results/clientpositive/spark/input1_limit.q.out @@ -40,8 +40,8 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/insert_into1.q.out ql/src/test/results/clientpositive/spark/insert_into1.q.out index e9be658..b21d422 100644 --- ql/src/test/results/clientpositive/spark/insert_into1.q.out +++ ql/src/test/results/clientpositive/spark/insert_into1.q.out @@ -24,7 +24,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -119,7 +119,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -186,7 +186,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -281,7 +281,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -348,7 +348,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -443,7 +443,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/insert_into2.q.out ql/src/test/results/clientpositive/spark/insert_into2.q.out index 5c8e9c7..82bfec1 100644 --- ql/src/test/results/clientpositive/spark/insert_into2.q.out +++ ql/src/test/results/clientpositive/spark/insert_into2.q.out @@ -28,7 +28,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -112,7 +112,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -192,7 +192,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -278,7 +278,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -381,7 +381,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -452,7 +452,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -555,7 +555,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/insert_into3.q.out ql/src/test/results/clientpositive/spark/insert_into3.q.out index 6c0111d..3d47ddc 100644 --- ql/src/test/results/clientpositive/spark/insert_into3.q.out +++ ql/src/test/results/clientpositive/spark/insert_into3.q.out @@ -40,8 +40,8 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -199,8 +199,8 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/join0.q.out ql/src/test/results/clientpositive/spark/join0.q.out index 55b725e..c000a3e 100644 --- ql/src/test/results/clientpositive/spark/join0.q.out +++ ql/src/test/results/clientpositive/spark/join0.q.out @@ -23,7 +23,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/join15.q.out ql/src/test/results/clientpositive/spark/join15.q.out index 1651db1..151eb2e 100644 --- ql/src/test/results/clientpositive/spark/join15.q.out +++ ql/src/test/results/clientpositive/spark/join15.q.out @@ -13,7 +13,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/join18.q.out ql/src/test/results/clientpositive/spark/join18.q.out index 7b64fb6..7ed794d 100644 --- ql/src/test/results/clientpositive/spark/join18.q.out +++ ql/src/test/results/clientpositive/spark/join18.q.out @@ -32,7 +32,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 5 (GROUP PARTITION-LEVEL SORT, 1) Reducer 5 <- Map 4 (GROUP, 1) #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out index 57c4516..17ccc4b 100644 --- ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out +++ ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out @@ -38,7 +38,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 5 (GROUP PARTITION-LEVEL SORT, 1) Reducer 5 <- Map 4 (GROUP, 1) #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/spark/join20.q.out ql/src/test/results/clientpositive/spark/join20.q.out index f06ffac..8ad4fbf 100644 --- ql/src/test/results/clientpositive/spark/join20.q.out +++ ql/src/test/results/clientpositive/spark/join20.q.out @@ -15,7 +15,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -676,7 +676,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/join21.q.out ql/src/test/results/clientpositive/spark/join21.q.out index e81ec5a..57e0dd7 100644 --- ql/src/test/results/clientpositive/spark/join21.q.out +++ ql/src/test/results/clientpositive/spark/join21.q.out @@ -13,7 +13,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/join23.q.out ql/src/test/results/clientpositive/spark/join23.q.out index 3982ea7..d27e81c 100644 --- ql/src/test/results/clientpositive/spark/join23.q.out +++ ql/src/test/results/clientpositive/spark/join23.q.out @@ -13,7 +13,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/join29.q.out ql/src/test/results/clientpositive/spark/join29.q.out index d5383d5..03a8704 100644 --- ql/src/test/results/clientpositive/spark/join29.q.out +++ ql/src/test/results/clientpositive/spark/join29.q.out @@ -36,7 +36,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 5 (GROUP PARTITION-LEVEL SORT, 1) Reducer 5 <- Map 4 (GROUP, 1) #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/spark/join30.q.out ql/src/test/results/clientpositive/spark/join30.q.out index 5c16622..1d5b99d 100644 --- ql/src/test/results/clientpositive/spark/join30.q.out +++ ql/src/test/results/clientpositive/spark/join30.q.out @@ -29,7 +29,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/join31.q.out ql/src/test/results/clientpositive/spark/join31.q.out index 9193df9..01d0d75 100644 --- ql/src/test/results/clientpositive/spark/join31.q.out +++ ql/src/test/results/clientpositive/spark/join31.q.out @@ -38,9 +38,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) Reducer 6 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/join35.q.out ql/src/test/results/clientpositive/spark/join35.q.out index 1750aec..80c38fb 100644 --- ql/src/test/results/clientpositive/spark/join35.q.out +++ ql/src/test/results/clientpositive/spark/join35.q.out @@ -156,7 +156,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 1), Union 3 (GROUP PARTITION-LEVEL SORT, 1) Reducer 6 <- Map 5 (GROUP, 1) Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) diff --git ql/src/test/results/clientpositive/spark/join38.q.out ql/src/test/results/clientpositive/spark/join38.q.out index cef8a84..8f45f76 100644 --- ql/src/test/results/clientpositive/spark/join38.q.out +++ ql/src/test/results/clientpositive/spark/join38.q.out @@ -58,7 +58,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/join_merge_multi_expressions.q.out ql/src/test/results/clientpositive/spark/join_merge_multi_expressions.q.out index 8e924be..995eb51 100644 --- ql/src/test/results/clientpositive/spark/join_merge_multi_expressions.q.out +++ ql/src/test/results/clientpositive/spark/join_merge_multi_expressions.q.out @@ -13,7 +13,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/join_vc.q.out ql/src/test/results/clientpositive/spark/join_vc.q.out index 6e34ef3..54e6daf 100644 --- ql/src/test/results/clientpositive/spark/join_vc.q.out +++ ql/src/test/results/clientpositive/spark/join_vc.q.out @@ -15,7 +15,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/limit_pushdown.q.out ql/src/test/results/clientpositive/spark/limit_pushdown.q.out index 94b38f7..59a0619 100644 --- ql/src/test/results/clientpositive/spark/limit_pushdown.q.out +++ ql/src/test/results/clientpositive/spark/limit_pushdown.q.out @@ -20,7 +20,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -103,7 +103,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -186,7 +186,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -284,7 +284,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -382,7 +382,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -475,7 +475,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -572,7 +572,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -695,8 +695,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -813,7 +813,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1) Reducer 5 <- Map 4 (GROUP, 1) Reducer 6 <- Reducer 5 (GROUP, 1) @@ -972,7 +972,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1029,6 +1029,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### val_0 0.0 +val_10 10.0 val_100 200.0 val_103 206.0 val_104 208.0 @@ -1037,17 +1038,16 @@ val_11 11.0 val_111 111.0 val_113 226.0 val_114 114.0 +val_116 116.0 val_118 236.0 val_119 357.0 val_12 24.0 val_120 240.0 val_125 250.0 val_126 126.0 +val_128 384.0 val_129 258.0 val_131 131.0 -val_138 414.0 -val_15 15.0 -val_193 193.0 PREHOOK: query: -- flush for order-by explain select key,value,value,value,value,value,value,value,value from src order by key limit 100 @@ -1064,7 +1064,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1229,7 +1229,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1304,50 +1304,50 @@ POSTHOOK: Input: default@src 155.0 156.0 157.0 +158.0 +160.0 162.0 163.0 166.0 168.0 17.0 +170.0 177.0 178.0 180.0 181.0 +183.0 +186.0 189.0 19.0 +190.0 192.0 194.0 196.0 20.0 200.0 201.0 +202.0 206.0 208.0 214.0 218.0 +222.0 226.0 226.0 228.0 235.0 236.0 24.0 -24.0 240.0 -242.0 250.0 -255.0 +258.0 268.0 -272.0 -273.0 274.0 -278.0 -282.0 292.0 298.0 30.0 304.0 -316.0 -327.0 328.0 330.0 344.0 @@ -1355,6 +1355,7 @@ POSTHOOK: Input: default@src 350.0 352.0 357.0 +358.0 36.0 382.0 384.0 @@ -1363,25 +1364,24 @@ POSTHOOK: Input: default@src 400.0 406.0 410.0 -417.0 +414.0 418.0 426.0 -430.0 432.0 434.0 438.0 442.0 446.0 -459.0 +448.0 +458.0 466.0 474.0 +476.0 478.0 -489.0 501.0 552.0 -554.0 +561.0 579.0 597.0 -622.0 624.0 676.0 diff --git ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out index 5dd5fad..1912b2a 100644 --- ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out +++ ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out @@ -63,9 +63,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 5 <- Map 4 (GROUP, 1) - Reducer 7 <- Map 6 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 7 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1) Union 3 <- Reducer 2 (NONE, 0), Reducer 5 (NONE, 0), Reducer 7 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/load_dyn_part2.q.out ql/src/test/results/clientpositive/spark/load_dyn_part2.q.out index f8f8971..43a6c8c 100644 --- ql/src/test/results/clientpositive/spark/load_dyn_part2.q.out +++ ql/src/test/results/clientpositive/spark/load_dyn_part2.q.out @@ -48,7 +48,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 10) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 10) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/mapjoin_distinct.q.out ql/src/test/results/clientpositive/spark/mapjoin_distinct.q.out index 9f66974..c5bbabf 100644 --- ql/src/test/results/clientpositive/spark/mapjoin_distinct.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin_distinct.q.out @@ -19,8 +19,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -160,7 +160,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -288,8 +288,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -424,7 +424,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/mapjoin_filter_on_outerjoin.q.out ql/src/test/results/clientpositive/spark/mapjoin_filter_on_outerjoin.q.out index e89ad85..10537ce 100644 --- ql/src/test/results/clientpositive/spark/mapjoin_filter_on_outerjoin.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin_filter_on_outerjoin.q.out @@ -61,7 +61,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -210,7 +210,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out index 1801d13..3e7de45 100644 --- ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out @@ -583,7 +583,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/mapjoin_test_outer.q.out ql/src/test/results/clientpositive/spark/mapjoin_test_outer.q.out index 19d03a0..a0e043c 100644 --- ql/src/test/results/clientpositive/spark/mapjoin_test_outer.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin_test_outer.q.out @@ -255,7 +255,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1081,7 +1081,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/mapreduce1.q.out ql/src/test/results/clientpositive/spark/mapreduce1.q.out index 1824126..940c1a3 100644 --- ql/src/test/results/clientpositive/spark/mapreduce1.q.out +++ ql/src/test/results/clientpositive/spark/mapreduce1.q.out @@ -32,7 +32,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/mapreduce2.q.out ql/src/test/results/clientpositive/spark/mapreduce2.q.out index 792a0c8..fe0623f 100644 --- ql/src/test/results/clientpositive/spark/mapreduce2.q.out +++ ql/src/test/results/clientpositive/spark/mapreduce2.q.out @@ -30,7 +30,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/merge1.q.out ql/src/test/results/clientpositive/spark/merge1.q.out index c50a80b..cced1d4 100644 --- ql/src/test/results/clientpositive/spark/merge1.q.out +++ ql/src/test/results/clientpositive/spark/merge1.q.out @@ -33,7 +33,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/merge2.q.out ql/src/test/results/clientpositive/spark/merge2.q.out index aec97a3..40a90ec 100644 --- ql/src/test/results/clientpositive/spark/merge2.q.out +++ ql/src/test/results/clientpositive/spark/merge2.q.out @@ -33,7 +33,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out index 7e32dd6..0383276 100644 --- ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out +++ ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out @@ -190,7 +190,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -250,7 +250,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -446,7 +446,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/multi_insert.q.out ql/src/test/results/clientpositive/spark/multi_insert.q.out index 2b9f90e..8744ffb 100644 --- ql/src/test/results/clientpositive/spark/multi_insert.q.out +++ ql/src/test/results/clientpositive/spark/multi_insert.q.out @@ -612,7 +612,7 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -776,7 +776,7 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -940,7 +940,7 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1104,7 +1104,7 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/multi_insert_gby.q.out ql/src/test/results/clientpositive/spark/multi_insert_gby.q.out index 7d6d58b..2c5fa3d 100644 --- ql/src/test/results/clientpositive/spark/multi_insert_gby.q.out +++ ql/src/test/results/clientpositive/spark/multi_insert_gby.q.out @@ -44,7 +44,7 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -241,7 +241,7 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/multi_insert_gby2.q.out ql/src/test/results/clientpositive/spark/multi_insert_gby2.q.out index fca3e1d..8b01f8b 100644 --- ql/src/test/results/clientpositive/spark/multi_insert_gby2.q.out +++ ql/src/test/results/clientpositive/spark/multi_insert_gby2.q.out @@ -42,8 +42,8 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out index ce78fba..01a5511 100644 --- ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out +++ ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out @@ -48,8 +48,8 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -174,8 +174,8 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1618,8 +1618,8 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1759,8 +1759,8 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out index bca846a..68b8dc7 100644 --- ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out +++ ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out @@ -299,8 +299,8 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -572,8 +572,8 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -865,9 +865,9 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Map 1 (GROUP SORT, 1) - Reducer 4 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1232,9 +1232,9 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Map 1 (GROUP SORT, 1) - Reducer 4 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/multi_insert_mixed.q.out ql/src/test/results/clientpositive/spark/multi_insert_mixed.q.out index 819b265..bfa495e 100644 --- ql/src/test/results/clientpositive/spark/multi_insert_mixed.q.out +++ ql/src/test/results/clientpositive/spark/multi_insert_mixed.q.out @@ -54,10 +54,10 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) - Reducer 4 <- Map 1 (GROUP, 1) - Reducer 5 <- Reducer 4 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out index 7e768e4..2ae5390 100644 --- ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out +++ ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out @@ -612,7 +612,7 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -776,7 +776,7 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -940,7 +940,7 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1104,7 +1104,7 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2441,9 +2441,9 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) - Reducer 4 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2575,9 +2575,9 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) - Reducer 4 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2709,9 +2709,9 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) - Reducer 4 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2843,9 +2843,9 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) - Reducer 4 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2985,9 +2985,9 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) - Reducer 4 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3232,9 +3232,9 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) - Reducer 4 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3479,9 +3479,9 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) - Reducer 4 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3726,9 +3726,9 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) - Reducer 4 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/multigroupby_singlemr.q.out ql/src/test/results/clientpositive/spark/multigroupby_singlemr.q.out index 2d0c4d7..a594808 100644 --- ql/src/test/results/clientpositive/spark/multigroupby_singlemr.q.out +++ ql/src/test/results/clientpositive/spark/multigroupby_singlemr.q.out @@ -60,8 +60,8 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -193,8 +193,8 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -326,8 +326,8 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -459,7 +459,7 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -573,9 +573,9 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out index d9de8d9..3e6737c 100644 --- ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out +++ ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out @@ -97,7 +97,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -649,8 +649,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 5 <- Map 4 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1) Union 3 <- Reducer 2 (NONE, 0), Reducer 5 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/order.q.out ql/src/test/results/clientpositive/spark/order.q.out index 3c5b169..5362e64 100644 --- ql/src/test/results/clientpositive/spark/order.q.out +++ ql/src/test/results/clientpositive/spark/order.q.out @@ -12,7 +12,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -84,7 +84,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/order2.q.out ql/src/test/results/clientpositive/spark/order2.q.out index 8999399..f46288e 100644 --- ql/src/test/results/clientpositive/spark/order2.q.out +++ ql/src/test/results/clientpositive/spark/order2.q.out @@ -16,7 +16,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/parallel.q.out ql/src/test/results/clientpositive/spark/parallel.q.out index 32d7ff1..983f8ab 100644 --- ql/src/test/results/clientpositive/spark/parallel.q.out +++ ql/src/test/results/clientpositive/spark/parallel.q.out @@ -40,8 +40,8 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/parquet_join.q.out ql/src/test/results/clientpositive/spark/parquet_join.q.out index d5a8684..a5f49f0 100644 --- ql/src/test/results/clientpositive/spark/parquet_join.q.out +++ ql/src/test/results/clientpositive/spark/parquet_join.q.out @@ -293,8 +293,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -305,47 +303,25 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 3 Data size: 315 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 3 Data size: 315 Basic stats: COMPLETE Column stats: NONE - value expressions: value2 (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 3 Data size: 334 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 {VALUE._col1} - outputColumnNames: _col1, _col7 - Statistics: Num rows: 3 Data size: 346 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col7 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 346 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 346 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {value} + 1 {value2} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col1, _col7 + Select Operator + expressions: _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/pcr.q.out ql/src/test/results/clientpositive/spark/pcr.q.out index 4e9244f..e10da04 100644 --- ql/src/test/results/clientpositive/spark/pcr.q.out +++ ql/src/test/results/clientpositive/spark/pcr.q.out @@ -103,7 +103,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -326,7 +326,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -643,7 +643,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -884,7 +884,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1127,7 +1127,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1426,7 +1426,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1725,7 +1725,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1940,7 +1940,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2191,7 +2191,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2526,7 +2526,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2757,7 +2757,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3065,7 +3065,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3392,7 +3392,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3761,7 +3761,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -4638,7 +4638,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -4823,7 +4823,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -5052,7 +5052,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out index c5c34c1..b126b94 100644 --- ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out +++ ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out @@ -27,7 +27,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -155,7 +155,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/ppd_transform.q.out ql/src/test/results/clientpositive/spark/ppd_transform.q.out index 282277a..5916bb7 100644 --- ql/src/test/results/clientpositive/spark/ppd_transform.q.out +++ ql/src/test/results/clientpositive/spark/ppd_transform.q.out @@ -24,7 +24,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -206,7 +206,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/sample10.q.out ql/src/test/results/clientpositive/spark/sample10.q.out index c511152..ce79525 100644 --- ql/src/test/results/clientpositive/spark/sample10.q.out +++ ql/src/test/results/clientpositive/spark/sample10.q.out @@ -90,8 +90,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/sample6.q.out ql/src/test/results/clientpositive/spark/sample6.q.out index f6256f5..8ccc0bc 100644 --- ql/src/test/results/clientpositive/spark/sample6.q.out +++ ql/src/test/results/clientpositive/spark/sample6.q.out @@ -488,7 +488,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -891,7 +891,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1548,7 +1548,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2048,7 +2048,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2534,7 +2534,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2867,7 +2867,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3077,7 +3077,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/script_pipe.q.out ql/src/test/results/clientpositive/spark/script_pipe.q.out index 5b966ff..1a1d8e6 100644 --- ql/src/test/results/clientpositive/spark/script_pipe.q.out +++ ql/src/test/results/clientpositive/spark/script_pipe.q.out @@ -12,7 +12,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/semijoin.q.out ql/src/test/results/clientpositive/spark/semijoin.q.out index 18fc837..84d7aa3 100644 --- ql/src/test/results/clientpositive/spark/semijoin.q.out +++ ql/src/test/results/clientpositive/spark/semijoin.q.out @@ -130,7 +130,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -236,7 +236,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -344,7 +344,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -444,7 +444,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -555,7 +555,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -658,7 +658,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -761,7 +761,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -861,7 +861,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -966,7 +966,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1084,7 +1084,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1188,7 +1188,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1321,7 +1321,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1434,7 +1434,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1573,7 +1573,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1706,7 +1706,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1842,7 +1842,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1978,7 +1978,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2114,7 +2114,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2252,7 +2252,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2402,7 +2402,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP SORT, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/skewjoin.q.out ql/src/test/results/clientpositive/spark/skewjoin.q.out index d674d04..3551ad6 100644 --- ql/src/test/results/clientpositive/spark/skewjoin.q.out +++ ql/src/test/results/clientpositive/spark/skewjoin.q.out @@ -456,7 +456,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -569,7 +569,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -697,7 +697,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -833,7 +833,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -978,7 +978,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out index d45cdd3..9690e51 100644 --- ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out +++ ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out @@ -16,7 +16,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/skewjoinopt1.q.out ql/src/test/results/clientpositive/spark/skewjoinopt1.q.out index 47ebe96..86cbc66 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt1.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt1.q.out @@ -241,7 +241,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -341,7 +341,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/skewjoinopt15.q.out ql/src/test/results/clientpositive/spark/skewjoinopt15.q.out index e197185..d4b63c5 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt15.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt15.q.out @@ -283,7 +283,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -383,7 +383,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/skewjoinopt9.q.out ql/src/test/results/clientpositive/spark/skewjoinopt9.q.out index 8188487..4ee46c9 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt9.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt9.q.out @@ -202,7 +202,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out index 0319137..69ec22b 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out @@ -133,7 +133,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -421,7 +421,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_14.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_14.q.out index cad4063..11558df 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_14.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_14.q.out @@ -61,7 +61,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -179,8 +179,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 3 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -329,8 +329,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -481,7 +481,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -619,7 +619,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -777,7 +777,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -929,7 +929,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1057,7 +1057,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1181,7 +1181,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1297,7 +1297,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1423,7 +1423,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1588,7 +1588,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out index 7849e78..15dc920 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out @@ -103,7 +103,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -455,7 +455,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -753,7 +753,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1051,7 +1051,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_16.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_16.q.out index 11ffefd..ae3d0fc 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_16.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_16.q.out @@ -53,7 +53,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out index 482268c..b3e9511 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out @@ -197,7 +197,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1), Map 8 (GROUP PARTITION-LEVEL SORT, 1), Map 9 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -409,124 +409,57 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1), Map 8 (GROUP PARTITION-LEVEL SORT, 1), Map 9 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: f - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: g - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan - alias: d - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map 6 - Map Operator Tree: - TableScan - alias: e - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map 7 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map 8 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map 9 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + Left Outer Join0 to 3 + Left Outer Join0 to 4 + Left Outer Join0 to 5 + Left Outer Join0 to 6 + condition expressions: + 0 + 1 + 2 + 3 + 4 + 5 + 6 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + 3 key (type: int) + 4 key (type: int) + 5 key (type: int) + 6 key (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - Left Outer Join0 to 2 - Left Outer Join0 to 3 - Left Outer Join0 to 4 - Left Outer Join0 to 5 - Left Outer Join0 to 6 - condition expressions: - 0 - 1 - 2 - 3 - 4 - 5 - 6 - Statistics: Num rows: 132 Data size: 528 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 132 Data size: 528 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -599,136 +532,60 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 10 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1), Map 8 (GROUP PARTITION-LEVEL SORT, 1), Map 9 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: f - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map 10 - Map Operator Tree: - TableScan - alias: h - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: g - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan - alias: d - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map 6 - Map Operator Tree: - TableScan - alias: e - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map 7 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map 8 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map 9 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + Left Outer Join0 to 3 + Left Outer Join0 to 4 + Left Outer Join0 to 5 + Left Outer Join0 to 6 + Left Outer Join0 to 7 + condition expressions: + 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + 3 key (type: int) + 4 key (type: int) + 5 key (type: int) + 6 key (type: int) + 7 key (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - Left Outer Join0 to 2 - Left Outer Join0 to 3 - Left Outer Join0 to 4 - Left Outer Join0 to 5 - Left Outer Join0 to 6 - Left Outer Join0 to 7 - condition expressions: - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - Statistics: Num rows: 154 Data size: 616 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 154 Data size: 616 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -833,101 +690,72 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 10 (GROUP PARTITION-LEVEL SORT, 1), Map 11 (GROUP PARTITION-LEVEL SORT, 1), Map 12 (GROUP PARTITION-LEVEL SORT, 1), Map 13 (GROUP PARTITION-LEVEL SORT, 1), Map 14 (GROUP PARTITION-LEVEL SORT, 1), Map 15 (GROUP PARTITION-LEVEL SORT, 1), Map 16 (GROUP PARTITION-LEVEL SORT, 1), Map 17 (GROUP PARTITION-LEVEL SORT, 1), Map 22 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1), Map 8 (GROUP PARTITION-LEVEL SORT, 1), Map 9 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 18 (GROUP PARTITION-LEVEL SORT, 1), Map 19 (GROUP PARTITION-LEVEL SORT, 1), Map 20 (GROUP PARTITION-LEVEL SORT, 1), Map 21 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: f - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map 10 - Map Operator Tree: - TableScan - alias: n - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map 11 - Map Operator Tree: - TableScan - alias: o - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map 12 - Map Operator Tree: - TableScan - alias: l - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map 13 - Map Operator Tree: - TableScan - alias: m - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map 14 - Map Operator Tree: - TableScan - alias: j - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map 15 - Map Operator Tree: - TableScan - alias: k - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map 16 - Map Operator Tree: - TableScan - alias: h - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map 17 - Map Operator Tree: - TableScan - alias: i - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map 18 + alias: a + Statistics: Num rows: 0 Data size: 80 Basic stats: PARTIAL Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + Left Outer Join0 to 3 + Left Outer Join0 to 4 + Left Outer Join0 to 5 + Left Outer Join0 to 6 + Left Outer Join0 to 7 + Left Outer Join0 to 8 + Left Outer Join0 to 9 + Left Outer Join0 to 10 + Left Outer Join0 to 11 + Left Outer Join0 to 12 + Left Outer Join0 to 13 + Left Outer Join0 to 14 + Left Outer Join0 to 15 + condition expressions: + 0 {key} {value} + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + 3 key (type: int) + 4 key (type: int) + 5 key (type: int) + 6 key (type: int) + 7 key (type: int) + 8 key (type: int) + 9 key (type: int) + 10 key (type: int) + 11 key (type: int) + 12 key (type: int) + 13 key (type: int) + 14 key (type: int) + 15 key (type: int) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: string) + Map 3 Map Operator Tree: TableScan alias: t @@ -937,50 +765,10 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: int) Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map 19 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map 20 - Map Operator Tree: - TableScan - alias: r - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map 21 - Map Operator Tree: - TableScan - alias: q - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map 22 - Map Operator Tree: - TableScan - alias: p - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan - alias: g + alias: s Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) @@ -990,7 +778,7 @@ STAGE PLANS: Map 5 Map Operator Tree: TableScan - alias: d + alias: r Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) @@ -1000,44 +788,13 @@ STAGE PLANS: Map 6 Map Operator Tree: TableScan - alias: e - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map 7 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map 8 - Map Operator Tree: - TableScan - alias: c + alias: q Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map 9 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 0 Data size: 80 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 0 Data size: 80 Basic stats: PARTIAL Column stats: NONE - value expressions: value (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -1046,50 +803,6 @@ STAGE PLANS: Left Outer Join0 to 2 Left Outer Join0 to 3 Left Outer Join0 to 4 - Left Outer Join0 to 5 - Left Outer Join0 to 6 - Left Outer Join0 to 7 - Left Outer Join0 to 8 - Left Outer Join0 to 9 - Left Outer Join0 to 10 - Left Outer Join0 to 11 - Left Outer Join0 to 12 - Left Outer Join0 to 13 - Left Outer Join0 to 14 - Left Outer Join0 to 15 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - 11 - 12 - 13 - 14 - 15 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 330 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 330 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - Left Outer Join0 to 2 - Left Outer Join0 to 3 - Left Outer Join0 to 4 condition expressions: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 @@ -1097,14 +810,11 @@ STAGE PLANS: 3 4 outputColumnNames: _col0, _col1 - Statistics: Num rows: 1452 Data size: 5808 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1452 Data size: 5808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1452 Data size: 5808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_20.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_20.q.out index 292f596..2d887d7 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_20.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_20.q.out @@ -55,7 +55,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -315,7 +315,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_21.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_21.q.out index 8bc5dd6..4cf0784 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_21.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_21.q.out @@ -132,7 +132,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -221,7 +221,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -310,7 +310,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -399,7 +399,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -488,7 +488,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out index efa38d4..674854f 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out @@ -204,48 +204,34 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 51 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and (key = 5)) (type: boolean) - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: 5 (type: int) - sort order: + - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 52 Data size: 208 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and (key = 5)) (type: boolean) Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: 5 (type: int) - sort order: + - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan - alias: d - Statistics: Num rows: 55 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and (key = 5)) (type: boolean) - Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: 5 (type: int) - sort order: + - Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: NONE - Map 7 + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 5 (type: int) + 1 5 (type: int) + Select Operator + expressions: 5 (type: int) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Map 3 Map Operator Tree: TableScan alias: c @@ -253,70 +239,42 @@ STAGE PLANS: Filter Operator predicate: (key is not null and (key = 5)) (type: boolean) Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: 5 (type: int) - sort order: + - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 5 (type: int) + 1 5 (type: int) + Select Operator + expressions: 5 (type: int) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Reducer 2 Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 14 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 5 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 14 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 14 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: Left Outer Join0 to 1 condition expressions: 0 {KEY.reducesinkkey0} 1 {KEY.reducesinkkey0} outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 67 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col1 = 5) (type: boolean) - Statistics: Num rows: 8 Data size: 33 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), 5 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 15 Data size: 61 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 5 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 15 Data size: 61 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 15 Data size: 61 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/sort.q.out ql/src/test/results/clientpositive/spark/sort.q.out index 04f6c32..7563ff0 100644 --- ql/src/test/results/clientpositive/spark/sort.q.out +++ ql/src/test/results/clientpositive/spark/sort.q.out @@ -12,7 +12,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/sort_merge_join_desc_1.q.out ql/src/test/results/clientpositive/spark/sort_merge_join_desc_1.q.out index 32c3818..438aa49 100644 --- ql/src/test/results/clientpositive/spark/sort_merge_join_desc_1.q.out +++ ql/src/test/results/clientpositive/spark/sort_merge_join_desc_1.q.out @@ -65,7 +65,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/sort_merge_join_desc_2.q.out ql/src/test/results/clientpositive/spark/sort_merge_join_desc_2.q.out index ae08516..60830e1 100644 --- ql/src/test/results/clientpositive/spark/sort_merge_join_desc_2.q.out +++ ql/src/test/results/clientpositive/spark/sort_merge_join_desc_2.q.out @@ -73,7 +73,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/sort_merge_join_desc_3.q.out ql/src/test/results/clientpositive/spark/sort_merge_join_desc_3.q.out index 6add9f9..919511c 100644 --- ql/src/test/results/clientpositive/spark/sort_merge_join_desc_3.q.out +++ ql/src/test/results/clientpositive/spark/sort_merge_join_desc_3.q.out @@ -73,7 +73,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/sort_merge_join_desc_4.q.out ql/src/test/results/clientpositive/spark/sort_merge_join_desc_4.q.out index b810a56..5db831d 100644 --- ql/src/test/results/clientpositive/spark/sort_merge_join_desc_4.q.out +++ ql/src/test/results/clientpositive/spark/sort_merge_join_desc_4.q.out @@ -71,7 +71,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/sort_merge_join_desc_5.q.out ql/src/test/results/clientpositive/spark/sort_merge_join_desc_5.q.out index f59d942..00d10f5 100644 --- ql/src/test/results/clientpositive/spark/sort_merge_join_desc_5.q.out +++ ql/src/test/results/clientpositive/spark/sort_merge_join_desc_5.q.out @@ -122,7 +122,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/sort_merge_join_desc_6.q.out ql/src/test/results/clientpositive/spark/sort_merge_join_desc_6.q.out index 4085d9a..05987fa 100644 --- ql/src/test/results/clientpositive/spark/sort_merge_join_desc_6.q.out +++ ql/src/test/results/clientpositive/spark/sort_merge_join_desc_6.q.out @@ -122,7 +122,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/sort_merge_join_desc_7.q.out ql/src/test/results/clientpositive/spark/sort_merge_join_desc_7.q.out index 28336c5..7b45735 100644 --- ql/src/test/results/clientpositive/spark/sort_merge_join_desc_7.q.out +++ ql/src/test/results/clientpositive/spark/sort_merge_join_desc_7.q.out @@ -160,7 +160,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/sort_merge_join_desc_8.q.out ql/src/test/results/clientpositive/spark/sort_merge_join_desc_8.q.out index 087a89d..94140da 100644 --- ql/src/test/results/clientpositive/spark/sort_merge_join_desc_8.q.out +++ ql/src/test/results/clientpositive/spark/sort_merge_join_desc_8.q.out @@ -120,7 +120,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -228,7 +228,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/subquery_in.q.out ql/src/test/results/clientpositive/spark/subquery_in.q.out index 323c894..929da4e 100644 --- ql/src/test/results/clientpositive/spark/subquery_in.q.out +++ ql/src/test/results/clientpositive/spark/subquery_in.q.out @@ -361,8 +361,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out index 2bedd37..26b9aa2 100644 --- ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out +++ ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out @@ -70,7 +70,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 9 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Map 7 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP SORT, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) Reducer 5 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) Reducer 9 <- Map 8 (GROUP, 1) #### A masked pattern was here #### @@ -505,7 +505,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 9 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Map 7 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP SORT, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) Reducer 5 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) Reducer 9 <- Map 8 (GROUP, 1) #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/spark/temp_table.q.out ql/src/test/results/clientpositive/spark/temp_table.q.out index a126fc7..1b3658b 100644 --- ql/src/test/results/clientpositive/spark/temp_table.q.out +++ ql/src/test/results/clientpositive/spark/temp_table.q.out @@ -210,7 +210,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Union 2 (GROUP SORT, 1) + Reducer 3 <- Union 2 (GROUP PARTITION-LEVEL SORT, 1) Union 2 <- Map 1 (NONE, 0), Map 4 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/tez_join_tests.q.out ql/src/test/results/clientpositive/spark/tez_join_tests.q.out index 9254944..81594de 100644 --- ql/src/test/results/clientpositive/spark/tez_join_tests.q.out +++ ql/src/test/results/clientpositive/spark/tez_join_tests.q.out @@ -13,9 +13,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 5 <- Reducer 4 (GROUP SORT, 1) + Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/tez_joins_explain.q.out ql/src/test/results/clientpositive/spark/tez_joins_explain.q.out index d2b23ad..ebc2af3 100644 --- ql/src/test/results/clientpositive/spark/tez_joins_explain.q.out +++ ql/src/test/results/clientpositive/spark/tez_joins_explain.q.out @@ -13,9 +13,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 5 <- Reducer 4 (GROUP SORT, 1) + Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/transform_ppr1.q.out ql/src/test/results/clientpositive/spark/transform_ppr1.q.out index 5309ade..713bff8 100644 --- ql/src/test/results/clientpositive/spark/transform_ppr1.q.out +++ ql/src/test/results/clientpositive/spark/transform_ppr1.q.out @@ -99,7 +99,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/transform_ppr2.q.out ql/src/test/results/clientpositive/spark/transform_ppr2.q.out index 2dc285f..722b8fa 100644 --- ql/src/test/results/clientpositive/spark/transform_ppr2.q.out +++ ql/src/test/results/clientpositive/spark/transform_ppr2.q.out @@ -101,7 +101,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/union10.q.out ql/src/test/results/clientpositive/spark/union10.q.out index 59ebb0c..8e9a552 100644 --- ql/src/test/results/clientpositive/spark/union10.q.out +++ ql/src/test/results/clientpositive/spark/union10.q.out @@ -36,9 +36,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 5 <- Map 4 (GROUP, 1) - Reducer 7 <- Map 6 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 7 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1) Union 3 <- Reducer 2 (NONE, 0), Reducer 5 (NONE, 0), Reducer 7 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/union11.q.out ql/src/test/results/clientpositive/spark/union11.q.out index 40e29b8..0192497 100644 --- ql/src/test/results/clientpositive/spark/union11.q.out +++ ql/src/test/results/clientpositive/spark/union11.q.out @@ -26,8 +26,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Union 3 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Union 3 (GROUP PARTITION-LEVEL SORT, 1) Reducer 6 <- Map 5 (GROUP, 1) Reducer 8 <- Map 7 (GROUP, 1) Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0), Reducer 8 (NONE, 0) @@ -191,6 +191,6 @@ POSTHOOK: query: select unionsrc.key, count(1) FROM (select 'tst1' as key, count POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -tst3 1 tst1 1 tst2 1 +tst3 1 diff --git ql/src/test/results/clientpositive/spark/union14.q.out ql/src/test/results/clientpositive/spark/union14.q.out index 5e8fdd8..11fdcce 100644 --- ql/src/test/results/clientpositive/spark/union14.q.out +++ ql/src/test/results/clientpositive/spark/union14.q.out @@ -24,7 +24,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Union 2 (GROUP, 1) + Reducer 3 <- Union 2 (GROUP PARTITION-LEVEL SORT, 1) Reducer 5 <- Map 4 (GROUP, 1) Union 2 <- Map 1 (NONE, 0), Reducer 5 (NONE, 0) #### A masked pattern was here #### @@ -131,20 +131,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 #### A masked pattern was here #### -278 1 -273 1 + 10 128 1 -255 1 -tst1 1 -238 1 146 1 150 1 - 10 -406 1 -369 1 213 1 -311 1 224 1 -66 1 +238 1 +255 1 +273 1 +278 1 +311 1 +369 1 401 1 +406 1 +66 1 98 1 +tst1 1 diff --git ql/src/test/results/clientpositive/spark/union15.q.out ql/src/test/results/clientpositive/spark/union15.q.out index 1c35ead..a374dd9 100644 --- ql/src/test/results/clientpositive/spark/union15.q.out +++ ql/src/test/results/clientpositive/spark/union15.q.out @@ -26,8 +26,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Union 3 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Union 3 (GROUP PARTITION-LEVEL SORT, 1) Union 3 <- Map 5 (NONE, 0), Map 6 (NONE, 0), Reducer 2 (NONE, 0) #### A masked pattern was here #### Vertices: @@ -155,20 +155,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 #### A masked pattern was here #### -278 2 -273 2 + 20 128 2 -255 2 -tst1 1 -238 2 146 2 150 2 - 20 -406 2 -369 2 213 2 -311 2 224 2 -66 2 +238 2 +255 2 +273 2 +278 2 +311 2 +369 2 401 2 +406 2 +66 2 98 2 +tst1 1 diff --git ql/src/test/results/clientpositive/spark/union16.q.out ql/src/test/results/clientpositive/spark/union16.q.out index c35ed10..dc7ffe2 100644 --- ql/src/test/results/clientpositive/spark/union16.q.out +++ ql/src/test/results/clientpositive/spark/union16.q.out @@ -72,7 +72,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Union 2 (GROUP, 1) + Reducer 3 <- Union 2 (GROUP PARTITION-LEVEL SORT, 1) Union 2 <- Map 1 (NONE, 0), Map 10 (NONE, 0), Map 11 (NONE, 0), Map 12 (NONE, 0), Map 13 (NONE, 0), Map 14 (NONE, 0), Map 15 (NONE, 0), Map 16 (NONE, 0), Map 17 (NONE, 0), Map 18 (NONE, 0), Map 19 (NONE, 0), Map 20 (NONE, 0), Map 21 (NONE, 0), Map 22 (NONE, 0), Map 23 (NONE, 0), Map 24 (NONE, 0), Map 25 (NONE, 0), Map 26 (NONE, 0), Map 27 (NONE, 0), Map 4 (NONE, 0), Map 5 (NONE, 0), Map 6 (NONE, 0), Map 7 (NONE, 0), Map 8 (NONE, 0), Map 9 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/union18.q.out ql/src/test/results/clientpositive/spark/union18.q.out index f1c69bf..a9d22d7 100644 --- ql/src/test/results/clientpositive/spark/union18.q.out +++ ql/src/test/results/clientpositive/spark/union18.q.out @@ -44,7 +44,7 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) Union 3 <- Map 4 (NONE, 0), Reducer 2 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/union19.q.out ql/src/test/results/clientpositive/spark/union19.q.out index c86afb0..1bd5dd1 100644 --- ql/src/test/results/clientpositive/spark/union19.q.out +++ ql/src/test/results/clientpositive/spark/union19.q.out @@ -44,8 +44,8 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Union 3 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Union 3 (GROUP PARTITION-LEVEL SORT, 1) Union 3 <- Map 5 (NONE, 0), Reducer 2 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/union2.q.out ql/src/test/results/clientpositive/spark/union2.q.out index da8d154..92888d6 100644 --- ql/src/test/results/clientpositive/spark/union2.q.out +++ ql/src/test/results/clientpositive/spark/union2.q.out @@ -20,7 +20,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Union 2 (GROUP, 1) + Reducer 3 <- Union 2 (GROUP PARTITION-LEVEL SORT, 1) Union 2 <- Map 1 (NONE, 0), Map 4 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/union23.q.out ql/src/test/results/clientpositive/spark/union23.q.out index 9e26762..37f302a 100644 --- ql/src/test/results/clientpositive/spark/union23.q.out +++ ql/src/test/results/clientpositive/spark/union23.q.out @@ -24,7 +24,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Union 2 (GROUP SORT, 1) + Reducer 3 <- Union 2 (GROUP PARTITION-LEVEL SORT, 1) Union 2 <- Map 1 (NONE, 0), Map 4 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/union25.q.out ql/src/test/results/clientpositive/spark/union25.q.out index 07ba875..0cea652 100644 --- ql/src/test/results/clientpositive/spark/union25.q.out +++ ql/src/test/results/clientpositive/spark/union25.q.out @@ -68,8 +68,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Union 2 (GROUP, 1) - Reducer 5 <- Union 4 (GROUP, 1) + Reducer 3 <- Union 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Union 4 (GROUP PARTITION-LEVEL SORT, 1) Union 2 <- Map 1 (NONE, 0), Map 6 (NONE, 0) Union 4 <- Map 7 (NONE, 0), Reducer 3 (NONE, 0) #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/spark/union28.q.out ql/src/test/results/clientpositive/spark/union28.q.out index f668ff8..5cb7e7b 100644 --- ql/src/test/results/clientpositive/spark/union28.q.out +++ ql/src/test/results/clientpositive/spark/union28.q.out @@ -42,8 +42,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) - Reducer 6 <- Map 5 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1) Union 2 <- Map 1 (NONE, 0), Reducer 4 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/union3.q.out ql/src/test/results/clientpositive/spark/union3.q.out index ba21367..30fc734 100644 --- ql/src/test/results/clientpositive/spark/union3.q.out +++ ql/src/test/results/clientpositive/spark/union3.q.out @@ -46,12 +46,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 9 (GROUP, 1) - Reducer 11 <- Reducer 10 (GROUP SORT, 1) - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 5 <- Map 4 (GROUP, 1) - Reducer 7 <- Map 6 (GROUP, 1) - Reducer 8 <- Reducer 7 (GROUP SORT, 1) + Reducer 10 <- Map 9 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 11 <- Reducer 10 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 7 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 8 <- Reducer 7 (GROUP PARTITION-LEVEL SORT, 1) Union 3 <- Reducer 11 (NONE, 0), Reducer 2 (NONE, 0), Reducer 5 (NONE, 0), Reducer 8 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/union30.q.out ql/src/test/results/clientpositive/spark/union30.q.out index ee0daf4..3027b10c 100644 --- ql/src/test/results/clientpositive/spark/union30.q.out +++ ql/src/test/results/clientpositive/spark/union30.q.out @@ -56,8 +56,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 5 <- Map 4 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1) Union 3 <- Map 6 (NONE, 0), Map 7 (NONE, 0), Reducer 2 (NONE, 0), Reducer 5 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/union33.q.out ql/src/test/results/clientpositive/spark/union33.q.out index ca08e0c..767cb1f 100644 --- ql/src/test/results/clientpositive/spark/union33.q.out +++ ql/src/test/results/clientpositive/spark/union33.q.out @@ -42,8 +42,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (GROUP SORT, 1) - Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) Union 2 <- Map 1 (NONE, 0), Reducer 5 (NONE, 0) #### A masked pattern was here #### Vertices: @@ -202,8 +202,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) Union 4 <- Map 5 (NONE, 0), Reducer 3 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/union4.q.out ql/src/test/results/clientpositive/spark/union4.q.out index 2e46204..6971577 100644 --- ql/src/test/results/clientpositive/spark/union4.q.out +++ ql/src/test/results/clientpositive/spark/union4.q.out @@ -34,8 +34,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 5 <- Map 4 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1) Union 3 <- Reducer 2 (NONE, 0), Reducer 5 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/union5.q.out ql/src/test/results/clientpositive/spark/union5.q.out index 5512b50..ade79d3 100644 --- ql/src/test/results/clientpositive/spark/union5.q.out +++ ql/src/test/results/clientpositive/spark/union5.q.out @@ -22,8 +22,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Union 3 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Union 3 (GROUP PARTITION-LEVEL SORT, 1) Reducer 6 <- Map 5 (GROUP, 1) Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/spark/union6.q.out ql/src/test/results/clientpositive/spark/union6.q.out index 01f044e..3b69dde 100644 --- ql/src/test/results/clientpositive/spark/union6.q.out +++ ql/src/test/results/clientpositive/spark/union6.q.out @@ -34,7 +34,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) Union 3 <- Map 4 (NONE, 0), Reducer 2 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/union7.q.out ql/src/test/results/clientpositive/spark/union7.q.out index b5a693e..74b28aa 100644 --- ql/src/test/results/clientpositive/spark/union7.q.out +++ ql/src/test/results/clientpositive/spark/union7.q.out @@ -22,8 +22,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Union 3 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Union 3 (GROUP PARTITION-LEVEL SORT, 1) Union 3 <- Map 5 (NONE, 0), Reducer 2 (NONE, 0) #### A masked pattern was here #### Vertices: @@ -127,20 +127,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 #### A masked pattern was here #### -278 1 -273 1 + 10 128 1 -255 1 -tst1 1 -238 1 146 1 150 1 - 10 -406 1 -369 1 213 1 -311 1 224 1 -66 1 +238 1 +255 1 +273 1 +278 1 +311 1 +369 1 401 1 +406 1 +66 1 98 1 +tst1 1 diff --git ql/src/test/results/clientpositive/spark/union9.q.out ql/src/test/results/clientpositive/spark/union9.q.out index db14477..7070cd7 100644 --- ql/src/test/results/clientpositive/spark/union9.q.out +++ ql/src/test/results/clientpositive/spark/union9.q.out @@ -22,7 +22,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Union 2 (GROUP, 1) + Reducer 3 <- Union 2 (GROUP PARTITION-LEVEL SORT, 1) Union 2 <- Map 1 (NONE, 0), Map 4 (NONE, 0), Map 5 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/union_ppr.q.out ql/src/test/results/clientpositive/spark/union_ppr.q.out index 9ed0c86..cac020d 100644 --- ql/src/test/results/clientpositive/spark/union_ppr.q.out +++ ql/src/test/results/clientpositive/spark/union_ppr.q.out @@ -112,7 +112,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Union 2 (GROUP SORT, 1) + Reducer 3 <- Union 2 (GROUP PARTITION-LEVEL SORT, 1) Union 2 <- Map 1 (NONE, 0), Map 4 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/union_remove_1.q.out ql/src/test/results/clientpositive/spark/union_remove_1.q.out index 85ec617..8b58667 100644 --- ql/src/test/results/clientpositive/spark/union_remove_1.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_1.q.out @@ -69,8 +69,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 5 <- Map 4 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1) Union 3 <- Reducer 2 (NONE, 0), Reducer 5 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/union_remove_10.q.out ql/src/test/results/clientpositive/spark/union_remove_10.q.out index f4601fd..4c0a08a 100644 --- ql/src/test/results/clientpositive/spark/union_remove_10.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_10.q.out @@ -90,7 +90,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) Union 3 <- Map 4 (NONE, 0), Map 5 (NONE, 0), Reducer 2 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/union_remove_15.q.out ql/src/test/results/clientpositive/spark/union_remove_15.q.out index fe2dcd1..5dfe863 100644 --- ql/src/test/results/clientpositive/spark/union_remove_15.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_15.q.out @@ -75,8 +75,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 5 <- Map 4 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1) Union 3 <- Reducer 2 (NONE, 0), Reducer 5 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/union_remove_16.q.out ql/src/test/results/clientpositive/spark/union_remove_16.q.out index 6902aeb..8aaf2f9 100644 --- ql/src/test/results/clientpositive/spark/union_remove_16.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_16.q.out @@ -78,8 +78,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 5 <- Map 4 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1) Union 3 <- Reducer 2 (NONE, 0), Reducer 5 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/union_remove_18.q.out ql/src/test/results/clientpositive/spark/union_remove_18.q.out index b901363..82e2f67 100644 --- ql/src/test/results/clientpositive/spark/union_remove_18.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_18.q.out @@ -73,8 +73,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 5 <- Map 4 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1) Union 3 <- Reducer 2 (NONE, 0), Reducer 5 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/union_remove_19.q.out ql/src/test/results/clientpositive/spark/union_remove_19.q.out index ea6ff41..c7d2ba9 100644 --- ql/src/test/results/clientpositive/spark/union_remove_19.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_19.q.out @@ -73,8 +73,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 5 <- Map 4 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1) Union 3 <- Reducer 2 (NONE, 0), Reducer 5 (NONE, 0) #### A masked pattern was here #### Vertices: @@ -281,8 +281,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 5 <- Map 4 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1) Union 3 <- Reducer 2 (NONE, 0), Reducer 5 (NONE, 0) #### A masked pattern was here #### Vertices: @@ -457,8 +457,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 5 <- Map 4 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1) Union 3 <- Reducer 2 (NONE, 0), Reducer 5 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/union_remove_2.q.out ql/src/test/results/clientpositive/spark/union_remove_2.q.out index 8e58c46..ca3c558 100644 --- ql/src/test/results/clientpositive/spark/union_remove_2.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_2.q.out @@ -75,7 +75,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1) Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0), Reducer 4 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/union_remove_20.q.out ql/src/test/results/clientpositive/spark/union_remove_20.q.out index 13085a4..74b5bda 100644 --- ql/src/test/results/clientpositive/spark/union_remove_20.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_20.q.out @@ -71,8 +71,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 5 <- Map 4 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1) Union 3 <- Reducer 2 (NONE, 0), Reducer 5 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/union_remove_21.q.out ql/src/test/results/clientpositive/spark/union_remove_21.q.out index 2cc70c5..d4df62c 100644 --- ql/src/test/results/clientpositive/spark/union_remove_21.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_21.q.out @@ -71,8 +71,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 5 <- Map 4 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1) Union 3 <- Reducer 2 (NONE, 0), Reducer 5 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/union_remove_24.q.out ql/src/test/results/clientpositive/spark/union_remove_24.q.out index 5913610..c9d13ad 100644 --- ql/src/test/results/clientpositive/spark/union_remove_24.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_24.q.out @@ -67,8 +67,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 5 <- Map 4 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1) Union 3 <- Reducer 2 (NONE, 0), Reducer 5 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/union_remove_25.q.out ql/src/test/results/clientpositive/spark/union_remove_25.q.out index 89b8bac..48da315 100644 --- ql/src/test/results/clientpositive/spark/union_remove_25.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_25.q.out @@ -85,8 +85,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 5 <- Map 4 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1) Union 3 <- Reducer 2 (NONE, 0), Reducer 5 (NONE, 0) #### A masked pattern was here #### Vertices: @@ -300,8 +300,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 5 <- Map 4 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1) Union 3 <- Reducer 2 (NONE, 0), Reducer 5 (NONE, 0) #### A masked pattern was here #### Vertices: @@ -450,7 +450,7 @@ Partition Parameters: numFiles 2 numRows -1 rawDataSize -1 - totalSize 6814 + totalSize 6812 #### A masked pattern was here #### # Storage Information @@ -488,8 +488,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 5 <- Map 4 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1) Union 3 <- Reducer 2 (NONE, 0), Reducer 5 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/union_remove_4.q.out ql/src/test/results/clientpositive/spark/union_remove_4.q.out index 909a924..89a00f4 100644 --- ql/src/test/results/clientpositive/spark/union_remove_4.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_4.q.out @@ -74,8 +74,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 5 <- Map 4 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1) Union 3 <- Reducer 2 (NONE, 0), Reducer 5 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/union_remove_5.q.out ql/src/test/results/clientpositive/spark/union_remove_5.q.out index fb24181..768c929 100644 --- ql/src/test/results/clientpositive/spark/union_remove_5.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_5.q.out @@ -82,7 +82,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1) Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0), Reducer 4 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/union_remove_6.q.out ql/src/test/results/clientpositive/spark/union_remove_6.q.out index 73f038b..dae3390 100644 --- ql/src/test/results/clientpositive/spark/union_remove_6.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_6.q.out @@ -72,8 +72,8 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 5 <- Map 4 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1) Union 3 <- Reducer 2 (NONE, 0), Reducer 5 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/union_remove_7.q.out ql/src/test/results/clientpositive/spark/union_remove_7.q.out index 49ec685..0192752 100644 --- ql/src/test/results/clientpositive/spark/union_remove_7.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_7.q.out @@ -73,8 +73,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 5 <- Map 4 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1) Union 3 <- Reducer 2 (NONE, 0), Reducer 5 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/union_remove_8.q.out ql/src/test/results/clientpositive/spark/union_remove_8.q.out index e98957d..b665ad7 100644 --- ql/src/test/results/clientpositive/spark/union_remove_8.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_8.q.out @@ -79,7 +79,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1) Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0), Reducer 4 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/union_remove_9.q.out ql/src/test/results/clientpositive/spark/union_remove_9.q.out index 66fe4e9..e1e8841 100644 --- ql/src/test/results/clientpositive/spark/union_remove_9.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_9.q.out @@ -86,7 +86,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1) Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0), Reducer 4 (NONE, 0) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/vector_between_in.q.out ql/src/test/results/clientpositive/spark/vector_between_in.q.out index f0d2ac7..99990e2 100644 --- ql/src/test/results/clientpositive/spark/vector_between_in.q.out +++ ql/src/test/results/clientpositive/spark/vector_between_in.q.out @@ -21,7 +21,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -74,7 +74,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -135,7 +135,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -188,7 +188,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -249,7 +249,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -302,7 +302,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -355,7 +355,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -408,7 +408,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out index 2dd7aab..b731ef8 100644 --- ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out @@ -120,8 +120,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out index 8b6a226..7424325 100644 --- ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out +++ ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out @@ -1248,7 +1248,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/vector_data_types.q.out ql/src/test/results/clientpositive/spark/vector_data_types.q.out index 5758c4b..5460fe9 100644 --- ql/src/test/results/clientpositive/spark/vector_data_types.q.out +++ ql/src/test/results/clientpositive/spark/vector_data_types.q.out @@ -110,7 +110,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -190,7 +190,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out index 3c6d561..e97a3ed 100644 --- ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out @@ -41,7 +41,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -114,14 +114,14 @@ POSTHOOK: query: SELECT cint, POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_vgby #### A masked pattern was here #### -253665376 1024 9767.0054054054 -9779.5486486487 -347484.0818378374 1024 11697.96923076923100 -11712.99230769231000 -416182.64030769233089 -762 2 5831542.2692483780 1531.2194054054 5833073.4886537834 2 6984454.21109769200000 1833.9456923076925 6986288.15678999969250 -528534767 1024 5831542.2692483780 -9777.1594594595 11646372.8607481068 1024 6984454.21109769200000 -11710.13076923077100 13948892.79980307629003 -6981 3 5831542.269248378 -515.6210729730 5830511.0271024320 3 6984454.211097692 -617.56077692307690 6983219.08954384584620 -626923679 1024 9723.4027027027 -9778.9513513514 10541.0525297287 1024 11645.74615384615400 -11712.27692307692300 12625.04759999997746 NULL 3072 9318.4351351351 -4298.1513513514 5018444.1081079808 3072 11160.71538461538500 -5147.90769230769300 6010604.30769230735360 -3728 6 5831542.2692483780 -3367.6517567568 5817556.0411483778 6 6984454.21109769200000 -4033.445769230769 6967702.86724384584710 -563 2 -515.6210729730 -3367.6517567568 -3883.2728297298 2 -617.56077692307690 -4033.445769230769 -4651.00654615384590 +762 2 5831542.2692483780 1531.2194054054 5833073.4886537834 2 6984454.21109769200000 1833.9456923076925 6986288.15678999969250 +6981 3 5831542.269248378 -515.6210729730 5830511.0271024320 3 6984454.211097692 -617.56077692307690 6983219.08954384584620 +253665376 1024 9767.0054054054 -9779.5486486487 -347484.0818378374 1024 11697.96923076923100 -11712.99230769231000 -416182.64030769233089 +528534767 1024 5831542.2692483780 -9777.1594594595 11646372.8607481068 1024 6984454.21109769200000 -11710.13076923077100 13948892.79980307629003 +626923679 1024 9723.4027027027 -9778.9513513514 10541.0525297287 1024 11645.74615384615400 -11712.27692307692300 12625.04759999997746 PREHOOK: query: -- Now add the others... EXPLAIN SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), @@ -146,7 +146,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -218,11 +218,11 @@ POSTHOOK: query: SELECT cint, POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_vgby #### A masked pattern was here #### -253665376 1024 9767.0054054054 -9779.5486486487 -347484.0818378374 -339.33992366976309 5708.9563478862 5711.745967572779 1024 11697.96923076923100 -11712.99230769231000 -416182.64030769233089 -406.428359675480791885 6837.632716002934 6840.973851172274 -762 2 5831542.2692483780 1531.2194054054 5833073.4886537834 2916536.7443268917 2915005.5249214866 4122440.3477364695 2 6984454.21109769200000 1833.9456923076925 6986288.15678999969250 3493144.07839499984625 3491310.1327026924 4937458.140118758 -528534767 1024 5831542.2692483780 -9777.1594594595 11646372.8607481068 11373.41099682432305 257528.92988206653 257654.7686043977 1024 6984454.21109769200000 -11710.13076923077100 13948892.79980307629003 13621.965624807691689482 308443.1074570801 308593.82484083984 -6981 3 5831542.269248378 -515.6210729730 5830511.0271024320 1943503.67570081066667 2749258.455012492 3367140.1929065133 3 6984454.211097692 -617.56077692307690 6983219.08954384584620 2327739.696514615282066667 3292794.4113115156 4032833.0678006653 -626923679 1024 9723.4027027027 -9778.9513513514 10541.0525297287 10.29399661106318 5742.09145323734 5744.897264034267 1024 11645.74615384615400 -11712.27692307692300 12625.04759999997746 12.329148046874977988 6877.318722794877 6880.679250101603 NULL 3072 9318.4351351351 -4298.1513513514 5018444.1081079808 1633.60810810806667 5695.483082135364 5696.4103077145055 3072 11160.71538461538500 -5147.90769230769300 6010604.30769230735360 1956.576923076922966667 6821.495748565159 6822.606289190924 -3728 6 5831542.2692483780 -3367.6517567568 5817556.0411483778 969592.67352472963333 2174330.2092403853 2381859.406131774 6 6984454.21109769200000 -4033.445769230769 6967702.86724384584710 1161283.811207307641183333 2604201.2704476737 2852759.5602156054 -563 2 -515.6210729730 -3367.6517567568 -3883.2728297298 -1941.6364148649 1426.0153418918999 2016.6902366556308 2 -617.56077692307690 -4033.445769230769 -4651.00654615384590 -2325.50327307692295 1707.9424961538462 2415.395441814127 +762 2 5831542.2692483780 1531.2194054054 5833073.4886537834 2916536.7443268917 2915005.5249214866 4122440.3477364695 2 6984454.21109769200000 1833.9456923076925 6986288.15678999969250 3493144.07839499984625 3491310.1327026924 4937458.140118758 +6981 3 5831542.269248378 -515.6210729730 5830511.0271024320 1943503.67570081066667 2749258.455012492 3367140.1929065133 3 6984454.211097692 -617.56077692307690 6983219.08954384584620 2327739.696514615282066667 3292794.4113115156 4032833.0678006653 +253665376 1024 9767.0054054054 -9779.5486486487 -347484.0818378374 -339.33992366976309 5708.9563478862 5711.745967572779 1024 11697.96923076923100 -11712.99230769231000 -416182.64030769233089 -406.428359675480791885 6837.632716002934 6840.973851172274 +528534767 1024 5831542.2692483780 -9777.1594594595 11646372.8607481068 11373.41099682432305 257528.92988206653 257654.7686043977 1024 6984454.21109769200000 -11710.13076923077100 13948892.79980307629003 13621.965624807691689482 308443.1074570801 308593.82484083984 +626923679 1024 9723.4027027027 -9778.9513513514 10541.0525297287 10.29399661106318 5742.09145323734 5744.897264034267 1024 11645.74615384615400 -11712.27692307692300 12625.04759999997746 12.329148046874977988 6877.318722794877 6880.679250101603 diff --git ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out index 31e2cd7..1fbc35b 100644 --- ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out +++ ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out @@ -25,7 +25,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out index 3b89885..94c08ff 100644 --- ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out +++ ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out @@ -116,8 +116,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/vectorization_0.q.out ql/src/test/results/clientpositive/spark/vectorization_0.q.out index 82e4926..7da9234 100644 --- ql/src/test/results/clientpositive/spark/vectorization_0.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_0.q.out @@ -24,8 +24,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -122,8 +122,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -229,8 +229,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -342,8 +342,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -440,8 +440,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -547,8 +547,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -660,8 +660,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -758,8 +758,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -865,8 +865,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1017,7 +1017,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/vectorization_14.q.out ql/src/test/results/clientpositive/spark/vectorization_14.q.out index 2d49a0c..116366a 100644 --- ql/src/test/results/clientpositive/spark/vectorization_14.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_14.q.out @@ -72,8 +72,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/vectorization_15.q.out ql/src/test/results/clientpositive/spark/vectorization_15.q.out index f9f4476..a892172 100644 --- ql/src/test/results/clientpositive/spark/vectorization_15.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_15.q.out @@ -68,8 +68,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/vectorization_9.q.out ql/src/test/results/clientpositive/spark/vectorization_9.q.out index 80a93f4..4528bbb 100644 --- ql/src/test/results/clientpositive/spark/vectorization_9.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_9.q.out @@ -46,7 +46,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/vectorization_div0.q.out ql/src/test/results/clientpositive/spark/vectorization_div0.q.out index 6f19862..8060682 100644 --- ql/src/test/results/clientpositive/spark/vectorization_div0.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_div0.q.out @@ -156,7 +156,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -331,7 +331,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/vectorization_part_project.q.out ql/src/test/results/clientpositive/spark/vectorization_part_project.q.out index aa87dd9..87dfcc8 100644 --- ql/src/test/results/clientpositive/spark/vectorization_part_project.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_part_project.q.out @@ -60,7 +60,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out index a785497..9742492 100644 --- ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out @@ -11,7 +11,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out index 5b9205b..6d9f1ef 100644 --- ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out @@ -15,7 +15,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out index 1c226fd..e9c9503 100644 --- ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out @@ -11,7 +11,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out index 1d11b30..f2a38ba 100644 --- ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out @@ -225,8 +225,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -507,8 +507,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) - Reducer 4 <- Reducer 3 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -820,7 +820,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1072,8 +1072,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1356,8 +1356,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1649,9 +1649,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 3 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2529,8 +2529,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2802,8 +2802,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3079,8 +3079,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3363,9 +3363,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) - Reducer 4 <- Reducer 3 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3686,8 +3686,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -4068,7 +4068,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 5 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) Reducer 5 <- Map 4 (GROUP SORT, 1) #### A masked pattern was here #### Vertices: @@ -4386,8 +4386,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -4527,31 +4527,31 @@ order by p_name) POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### -Manufacturer#3 almond antique forest lavender goldenrod 14 -Manufacturer#4 almond aquamarine floral ivory bisque 27 -Manufacturer#3 almond antique olive coral navajo 45 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 -Manufacturer#4 almond antique gainsboro frosted violet 10 -Manufacturer#2 almond aquamarine rose maroon antique 25 -Manufacturer#5 almond aquamarine dodger light gainsboro 46 -Manufacturer#1 almond antique salmon chartreuse burlywood 6 Manufacturer#1 almond antique burnished rose metallic 2 -Manufacturer#4 almond azure aquamarine papaya violet 12 +Manufacturer#1 almond antique chartreuse lavender yellow 34 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 Manufacturer#1 almond aquamarine burnished black steel 28 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 +Manufacturer#2 almond antique violet chocolate turquoise 14 Manufacturer#2 almond antique violet turquoise frosted 40 -Manufacturer#1 almond antique chartreuse lavender yellow 34 -Manufacturer#4 almond antique violet mint lemon 39 Manufacturer#2 almond aquamarine midnight light salmon 2 -Manufacturer#5 almond azure blanched chiffon midnight 23 -Manufacturer#2 almond antique violet chocolate turquoise 14 +Manufacturer#2 almond aquamarine rose maroon antique 25 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 +Manufacturer#3 almond antique chartreuse khaki white 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 +Manufacturer#3 almond antique metallic orange dim 19 +Manufacturer#3 almond antique misty red olive 1 +Manufacturer#3 almond antique olive coral navajo 45 +Manufacturer#4 almond antique gainsboro frosted violet 10 +Manufacturer#4 almond antique violet mint lemon 39 +Manufacturer#4 almond aquamarine floral ivory bisque 27 Manufacturer#4 almond aquamarine yellow dodger mint 7 -Manufacturer#1 almond aquamarine pink moccasin thistle 42 +Manufacturer#4 almond azure aquamarine papaya violet 12 Manufacturer#5 almond antique blue firebrick mint 31 Manufacturer#5 almond antique medium spring khaki 6 -Manufacturer#3 almond antique misty red olive 1 -Manufacturer#3 almond antique metallic orange dim 19 Manufacturer#5 almond antique sky peru orange 2 -Manufacturer#3 almond antique chartreuse khaki white 17 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 +Manufacturer#5 almond azure blanched chiffon midnight 23 PREHOOK: query: -- 16. testViewAsTableInputToPTF create view IF NOT EXISTS mfgr_price_view as select p_mfgr, p_brand, @@ -4653,9 +4653,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) - Reducer 4 <- Reducer 3 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -5123,10 +5123,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) - Reducer 4 <- Reducer 3 (GROUP SORT, 1) - Reducer 5 <- Reducer 2 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -5630,9 +5630,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) - Reducer 4 <- Reducer 3 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -5982,10 +5982,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) - Reducer 4 <- Reducer 3 (GROUP SORT, 1) - Reducer 5 <- Reducer 4 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -6332,9 +6332,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) - Reducer 4 <- Reducer 3 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -6677,10 +6677,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) - Reducer 4 <- Reducer 3 (GROUP SORT, 1) - Reducer 5 <- Reducer 4 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -7072,9 +7072,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) - Reducer 4 <- Reducer 3 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -7417,9 +7417,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) - Reducer 3 <- Reducer 2 (GROUP SORT, 1) - Reducer 4 <- Reducer 3 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out index 5b9205b..6d9f1ef 100644 --- ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out @@ -15,7 +15,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out index cd43197..95c6edd 100644 --- ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out @@ -109,22 +109,22 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 20 Data size: 829 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 834 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: to_unix_timestamp(ctimestamp1) (type: bigint), year(ctimestamp1) (type: int), month(ctimestamp1) (type: int), day(ctimestamp1) (type: int), dayofmonth(ctimestamp1) (type: int), weekofyear(ctimestamp1) (type: int), hour(ctimestamp1) (type: int), minute(ctimestamp1) (type: int), second(ctimestamp1) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 20 Data size: 829 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 834 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + - Statistics: Num rows: 20 Data size: 829 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 834 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized Reducer 2 @@ -132,10 +132,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 20 Data size: 829 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 834 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 829 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 834 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -252,22 +252,22 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 8 Data size: 829 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 834 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 8 Data size: 829 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 834 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + - Statistics: Num rows: 8 Data size: 829 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 834 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized Reducer 2 @@ -275,10 +275,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 8 Data size: 829 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 834 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 829 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 834 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -395,22 +395,22 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 5 Data size: 829 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 834 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1)) (type: boolean), (year(ctimestamp1) = year(stimestamp1)) (type: boolean), (month(ctimestamp1) = month(stimestamp1)) (type: boolean), (day(ctimestamp1) = day(stimestamp1)) (type: boolean), (dayofmonth(ctimestamp1) = dayofmonth(stimestamp1)) (type: boolean), (weekofyear(ctimestamp1) = weekofyear(stimestamp1)) (type: boolean), (hour(ctimestamp1) = hour(stimestamp1)) (type: boolean), (minute(ctimestamp1) = minute(stimestamp1)) (type: boolean), (second(ctimestamp1) = second(stimestamp1)) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5 Data size: 829 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 834 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + - Statistics: Num rows: 5 Data size: 829 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 834 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized Reducer 2 @@ -418,10 +418,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5 Data size: 829 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 834 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 829 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 834 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -542,7 +542,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -636,18 +636,18 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 20 Data size: 829 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 834 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp) outputColumnNames: ctimestamp1 - Statistics: Num rows: 20 Data size: 829 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 834 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count() mode: hash @@ -721,18 +721,18 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 20 Data size: 829 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 834 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp) outputColumnNames: ctimestamp1 - Statistics: Num rows: 20 Data size: 829 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 834 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ctimestamp1) mode: hash @@ -780,7 +780,7 @@ FROM alltypesorc_string POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### -1123143.857 +1123143.8570000003 PREHOOK: query: EXPLAIN SELECT avg(ctimestamp1), variance(ctimestamp1), @@ -811,18 +811,18 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 20 Data size: 829 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 834 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp) outputColumnNames: ctimestamp1 - Statistics: Num rows: 20 Data size: 829 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 834 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(ctimestamp1), variance(ctimestamp1), var_pop(ctimestamp1), var_samp(ctimestamp1), std(ctimestamp1), stddev(ctimestamp1), stddev_pop(ctimestamp1), stddev_samp(ctimestamp1) mode: hash