diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java index b2c6cf3..11f4236 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java @@ -18,23 +18,15 @@ package org.apache.hadoop.hive.ql.exec.spark; -import java.util.ArrayList; import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.Queue; -import java.util.Set; import com.google.common.base.Preconditions; import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.io.merge.MergeFileMapper; import org.apache.hadoop.hive.ql.io.merge.MergeFileOutputFormat; import org.apache.hadoop.hive.ql.io.merge.MergeFileWork; @@ -70,7 +62,7 @@ private Context context; private Path scratchDir; private SparkReporter sparkReporter; - private final Map cloneToWork; + private Map cloneToWork; private final Map workToTranMap; private final Map workToParentWorkTranMap; @@ -85,7 +77,6 @@ public SparkPlanGenerator( this.context = context; this.jobConf = jobConf; this.scratchDir = scratchDir; - this.cloneToWork = new HashMap(); this.workToTranMap = new HashMap(); this.workToParentWorkTranMap = new HashMap(); this.sparkReporter = sparkReporter; @@ -93,12 +84,10 @@ public SparkPlanGenerator( public SparkPlan generate(SparkWork sparkWork) throws Exception { SparkPlan sparkPlan = new SparkPlan(); - cloneToWork.clear(); + cloneToWork = sparkWork.getCloneToWork(); workToTranMap.clear(); workToParentWorkTranMap.clear(); - splitSparkWork(sparkWork); - for (BaseWork work : sparkWork.getAllWork()) { SparkTran tran; if (work instanceof MapWork) { @@ -159,105 +148,6 @@ private SparkTran generateParentTran(SparkPlan sparkPlan, SparkWork sparkWork, B return result; } - - private void splitSparkWork(SparkWork sparkWork) { - // do a BFS on the sparkWork graph, and look for any work that has more than one child. - // If we found such a work, we split it into multiple ones, one for each of its child. - Queue queue = new LinkedList(); - Set visited = new HashSet(); - queue.addAll(sparkWork.getRoots()); - while (!queue.isEmpty()) { - BaseWork work = queue.poll(); - if (!visited.add(work)) { - continue; - } - - List childWorks = sparkWork.getChildren(work); - // First, add all children of this work into queue, to be processed later. - for (BaseWork w : childWorks) { - queue.add(w); - } - - // Second, check if this work has multiple reduceSinks. If so, do split. - splitBaseWork(sparkWork, work, childWorks); - } - } - - private Set> getAllReduceSinks(BaseWork work) { - Set> resultSet = work.getAllLeafOperators(); - Iterator> it = resultSet.iterator(); - while (it.hasNext()) { - if (!(it.next() instanceof ReduceSinkOperator)) { - it.remove(); - } - } - return resultSet; - } - - // Split work into multiple branches, one for each childWork in childWorks. - // It also set up the connection between each parent work and child work. - private void splitBaseWork(SparkWork sparkWork, BaseWork parentWork, List childWorks) { - if (getAllReduceSinks(parentWork).size() <= 1) { - return; - } - - // Grand-parent works - we need to set these to be the parents of the cloned works. - List grandParentWorks = sparkWork.getParents(parentWork); - boolean isFirst = true; - - for (BaseWork childWork : childWorks) { - BaseWork clonedParentWork = Utilities.cloneBaseWork(parentWork); - String childReducerName = childWork.getName(); - SparkEdgeProperty clonedEdgeProperty = sparkWork.getEdgeProperty(parentWork, childWork); - - // We need to remove those branches that - // 1, ended with a ReduceSinkOperator, and - // 2, the ReduceSinkOperator's name is not the same as childReducerName. - // Also, if the cloned work is not the first, we remove ALL leaf operators except - // the corresponding ReduceSinkOperator. - for (Operator op : clonedParentWork.getAllLeafOperators()) { - if (op instanceof ReduceSinkOperator) { - if (!((ReduceSinkOperator)op).getConf().getOutputName().equals(childReducerName)) { - removeOpRecursive(op); - } - } else if (!isFirst) { - removeOpRecursive(op); - } - } - - isFirst = false; - - // Then, we need to set up the graph connection. Especially: - // 1, we need to connect this cloned parent work with all the grand-parent works. - // 2, we need to connect this cloned parent work with the corresponding child work. - sparkWork.add(clonedParentWork); - for (BaseWork gpw : grandParentWorks) { - sparkWork.connect(gpw, clonedParentWork, sparkWork.getEdgeProperty(gpw, parentWork)); - } - sparkWork.connect(clonedParentWork, childWork, clonedEdgeProperty); - cloneToWork.put(clonedParentWork, parentWork); - } - - sparkWork.remove(parentWork); - } - - // Remove op from all its parents' child list. - // Recursively remove any of its parent who only have this op as child. - private void removeOpRecursive(Operator operator) { - List> parentOperators = new ArrayList>(); - for (Operator op : operator.getParentOperators()) { - parentOperators.add(op); - } - for (Operator parentOperator : parentOperators) { - Preconditions.checkArgument(parentOperator.getChildOperators().contains(operator), - "AssertionError: parent of " + operator.getName() + " doesn't have it as child."); - parentOperator.removeChild(operator); - if (parentOperator.getNumChild() == 0) { - removeOpRecursive(parentOperator); - } - } - } - private Class getInputFormat(JobConf jobConf, MapWork mWork) throws HiveException { // MergeFileWork is sub-class of MapWork, we don't need to distinguish here if (mWork.getInputformat() != null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SplitSparkWorkResolver.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SplitSparkWorkResolver.java new file mode 100644 index 0000000..67dda02 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SplitSparkWorkResolver.java @@ -0,0 +1,185 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.spark; + +import com.google.common.base.Preconditions; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.spark.SparkTask; +import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext; +import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalPlanResolver; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.parse.spark.GenSparkUtils; +import org.apache.hadoop.hive.ql.plan.BaseWork; +import org.apache.hadoop.hive.ql.plan.MapWork; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.ReduceWork; +import org.apache.hadoop.hive.ql.plan.SparkEdgeProperty; +import org.apache.hadoop.hive.ql.plan.SparkWork; + +import java.io.Serializable; +import java.util.*; + +/** + * Do a BFS on the sparkWork graph, and look for any work that has more than one child. + * If we found such a work, we split it into multiple ones, one for each of its child. + */ +public class SplitSparkWorkResolver implements PhysicalPlanResolver { + @Override + public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { + for (Task task : pctx.getRootTasks()) { + if (task instanceof SparkTask) { + splitSparkWork(((SparkTask) task).getWork()); + } + } + return pctx; + } + + private void splitSparkWork(SparkWork sparkWork) { + Queue queue = new LinkedList(); + Set visited = new HashSet(); + queue.addAll(sparkWork.getRoots()); + while (!queue.isEmpty()) { + BaseWork work = queue.poll(); + if (!visited.add(work)) { + continue; + } + + List childWorks = sparkWork.getChildren(work); + // First, add all children of this work into queue, to be processed later. + for (BaseWork w : childWorks) { + queue.add(w); + } + + // Second, check if this work has multiple reduceSinks. If so, do split. + splitBaseWork(sparkWork, work, childWorks); + } + } + + // Split work into multiple branches, one for each childWork in childWorks. + // It also set up the connection between each parent work and child work. + private void splitBaseWork(SparkWork sparkWork, BaseWork parentWork, List childWorks) { + if (getAllReduceSinks(parentWork).size() <= 1) { + return; + } + + // Grand-parent works - we need to set these to be the parents of the cloned works. + List grandParentWorks = sparkWork.getParents(parentWork); + boolean isFirst = true; + + for (BaseWork childWork : childWorks) { + BaseWork clonedParentWork = Utilities.cloneBaseWork(parentWork); + // give the cloned work a different name + clonedParentWork.setName(clonedParentWork.getName().replaceAll("^([a-zA-Z]+)(\\s+)(\\d+)", + "$1$2" + GenSparkUtils.getUtils().getNextSeqNumber())); + setStatistics(parentWork, clonedParentWork); + String childReducerName = childWork.getName(); + SparkEdgeProperty clonedEdgeProperty = sparkWork.getEdgeProperty(parentWork, childWork); + + // We need to remove those branches that + // 1, ended with a ReduceSinkOperator, and + // 2, the ReduceSinkOperator's name is not the same as childReducerName. + // Also, if the cloned work is not the first, we remove ALL leaf operators except + // the corresponding ReduceSinkOperator. + for (Operator op : clonedParentWork.getAllLeafOperators()) { + if (op instanceof ReduceSinkOperator) { + if (!((ReduceSinkOperator) op).getConf().getOutputName().equals(childReducerName)) { + removeOpRecursive(op); + } + } else if (!isFirst) { + removeOpRecursive(op); + } + } + + isFirst = false; + + // Then, we need to set up the graph connection. Especially: + // 1, we need to connect this cloned parent work with all the grand-parent works. + // 2, we need to connect this cloned parent work with the corresponding child work. + sparkWork.add(clonedParentWork); + for (BaseWork gpw : grandParentWorks) { + sparkWork.connect(gpw, clonedParentWork, sparkWork.getEdgeProperty(gpw, parentWork)); + } + sparkWork.connect(clonedParentWork, childWork, clonedEdgeProperty); + sparkWork.getCloneToWork().put(clonedParentWork, parentWork); + } + + sparkWork.remove(parentWork); + } + + private Set> getAllReduceSinks(BaseWork work) { + Set> resultSet = work.getAllLeafOperators(); + Iterator> it = resultSet.iterator(); + while (it.hasNext()) { + if (!(it.next() instanceof ReduceSinkOperator)) { + it.remove(); + } + } + return resultSet; + } + + // Remove op from all its parents' child list. + // Recursively remove any of its parent who only have this op as child. + private void removeOpRecursive(Operator operator) { + List> parentOperators = new ArrayList>(); + for (Operator op : operator.getParentOperators()) { + parentOperators.add(op); + } + for (Operator parentOperator : parentOperators) { + Preconditions.checkArgument(parentOperator.getChildOperators().contains(operator), + "AssertionError: parent of " + operator.getName() + " doesn't have it as child."); + parentOperator.removeChild(operator); + if (parentOperator.getNumChild() == 0) { + removeOpRecursive(parentOperator); + } + } + } + + // we lost statistics & opTraits through cloning, try to get them back + // TODO: make sure this method is sufficient to solve the problem + private void setStatistics(BaseWork origin, BaseWork clone) { + if (origin instanceof MapWork && clone instanceof MapWork) { + MapWork originMW = (MapWork) origin; + MapWork cloneMW = (MapWork) clone; + for (Map.Entry> entry : + originMW.getAliasToWork().entrySet()) { + String alias = entry.getKey(); + Operator cloneOP = cloneMW.getAliasToWork().get(alias); + if (cloneOP != null) { + setStatistics(entry.getValue(), cloneOP); + } + } + } else if (origin instanceof ReduceWork && clone instanceof ReduceWork) { + setStatistics(((ReduceWork) origin).getReducer(), ((ReduceWork) clone).getReducer()); + } + } + + private void setStatistics(Operator origin, + Operator clone) { + clone.getConf().setStatistics(origin.getConf().getStatistics()); + clone.getConf().setOpTraits(origin.getConf().getOpTraits()); + if (origin.getChildOperators().size() == clone.getChildOperators().size()) { + for (int i = 0; i < clone.getChildOperators().size(); i++) { + setStatistics(origin.getChildOperators().get(i), clone.getChildOperators().get(i)); + } + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java index f6ca068..5a01ffa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java @@ -410,4 +410,8 @@ private static boolean isSame(List list1, List list2 } return null; } + + public synchronized int getNextSeqNumber() { + return ++sequenceNumber; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java index 11e711e..a76cac5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java @@ -62,6 +62,7 @@ import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer; import org.apache.hadoop.hive.ql.optimizer.spark.SetSparkReducerParallelism; import org.apache.hadoop.hive.ql.optimizer.spark.SparkSortMergeJoinFactory; +import org.apache.hadoop.hive.ql.optimizer.spark.SplitSparkWorkResolver; import org.apache.hadoop.hive.ql.parse.GlobalLimitCtx; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -262,6 +263,8 @@ protected void optimizeTaskPlan(List> rootTasks, Pa PhysicalContext physicalCtx = new PhysicalContext(conf, pCtx, pCtx.getContext(), rootTasks, pCtx.getFetchTask()); + physicalCtx = new SplitSparkWorkResolver().resolve(physicalCtx); + if (conf.getBoolVar(HiveConf.ConfVars.HIVENULLSCANOPTIMIZE)) { physicalCtx = new NullScanOptimizer().resolve(physicalCtx); } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java index 351d533..d2a1c0f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java @@ -49,15 +49,20 @@ private final Set roots = new HashSet(); private final Set leaves = new HashSet(); - protected final Map> workGraph = new HashMap>(); - protected final Map> invertedWorkGraph = new HashMap>(); + protected final Map> workGraph = + new HashMap>(); + protected final Map> invertedWorkGraph = + new HashMap>(); protected final Map, SparkEdgeProperty> edgeProperties = new HashMap, SparkEdgeProperty>(); private Map> requiredCounterPrefix; + private final Map cloneToWork; + public SparkWork(String name) { this.name = name + ":" + (++counter); + cloneToWork = new HashMap(); } @@ -305,20 +310,25 @@ public int compareTo(Dependency o) { @Explain(displayName = "Edges") public Map> getDependencyMap() { Map> result = new LinkedHashMap>(); - for (Map.Entry> entry: invertedWorkGraph.entrySet()) { - List dependencies = new LinkedList(); - for (BaseWork d: entry.getValue()) { - Dependency dependency = new Dependency(); - dependency.w = d; - dependency.prop = getEdgeProperty(d, entry.getKey()); - dependencies.add(dependency); - } - if (!dependencies.isEmpty()) { - Collections.sort(dependencies); - result.put(entry.getKey().getName(), dependencies); + for (BaseWork baseWork : getAllWork()) { + if (invertedWorkGraph.get(baseWork) != null && invertedWorkGraph.get(baseWork).size() > 0) { + List dependencies = new LinkedList(); + for (BaseWork d : invertedWorkGraph.get(baseWork)) { + Dependency dependency = new Dependency(); + dependency.w = d; + dependency.prop = getEdgeProperty(d, baseWork); + dependencies.add(dependency); + } + if (!dependencies.isEmpty()) { + Collections.sort(dependencies); + result.put(baseWork.getName(), dependencies); + } } } return result; } + public Map getCloneToWork() { + return cloneToWork; + } } diff --git ql/src/test/results/clientpositive/spark/groupby10.q.out ql/src/test/results/clientpositive/spark/groupby10.q.out index 561f1dd..60b4d50 100644 --- ql/src/test/results/clientpositive/spark/groupby10.q.out +++ ql/src/test/results/clientpositive/spark/groupby10.q.out @@ -56,9 +56,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 6 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -72,34 +73,6 @@ STAGE PLANS: Map-reduce partition columns: substr(value, 5) (type: string) Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: int) - Reducer 2 - Reduce Operator Tree: - Forward - Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(KEY._col0), count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) - Group By Operator - aggregations: sum(KEY._col0), sum(DISTINCT KEY._col0) - keys: VALUE._col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double) Reducer 3 Reduce Operator Tree: Group By Operator @@ -140,6 +113,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Reducer 5 + Reduce Operator Tree: + Forward + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(KEY._col0), count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(KEY._col0), sum(DISTINCT KEY._col0) + keys: VALUE._col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double) Stage: Stage-3 Dependency Collection @@ -268,9 +273,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 6 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -284,34 +290,6 @@ STAGE PLANS: Map-reduce partition columns: substr(value, 5) (type: string) Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: int) - Reducer 2 - Reduce Operator Tree: - Forward - Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(KEY._col0), count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) - Group By Operator - aggregations: sum(KEY._col0), sum(DISTINCT KEY._col0) - keys: VALUE._col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double) Reducer 3 Reduce Operator Tree: Group By Operator @@ -352,6 +330,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Reducer 5 + Reduce Operator Tree: + Forward + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(KEY._col0), count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(KEY._col0), sum(DISTINCT KEY._col0) + keys: VALUE._col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double) Stage: Stage-3 Dependency Collection @@ -482,9 +492,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 6 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -498,34 +509,6 @@ STAGE PLANS: Map-reduce partition columns: substr(value, 5) (type: string) Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: int) - Reducer 2 - Reduce Operator Tree: - Forward - Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(DISTINCT KEY._col0), count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: bigint) - Group By Operator - aggregations: sum(DISTINCT KEY._col0), avg(DISTINCT KEY._col0) - keys: VALUE._col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator @@ -566,6 +549,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Reducer 5 + Reduce Operator Tree: + Forward + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(DISTINCT KEY._col0), count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(DISTINCT KEY._col0), avg(DISTINCT KEY._col0) + keys: VALUE._col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: struct) Stage: Stage-3 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/groupby11.q.out ql/src/test/results/clientpositive/spark/groupby11.q.out index 41967e5..445bb52 100644 --- ql/src/test/results/clientpositive/spark/groupby11.q.out +++ ql/src/test/results/clientpositive/spark/groupby11.q.out @@ -44,9 +44,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 6 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -60,34 +61,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string), substr(value, 5) (type: string) - Reducer 2 - Reduce Operator Tree: - Forward - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(KEY._col0), count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) - Group By Operator - aggregations: count(KEY._col0), count(DISTINCT KEY._col0) - keys: VALUE._col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -128,6 +101,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Reducer 5 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(KEY._col0), count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(KEY._col0), count(DISTINCT KEY._col0) + keys: VALUE._col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) Stage: Stage-3 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/groupby7_map.q.out ql/src/test/results/clientpositive/spark/groupby7_map.q.out index 9a3460f..34fd5b2 100644 --- ql/src/test/results/clientpositive/spark/groupby7_map.q.out +++ ql/src/test/results/clientpositive/spark/groupby7_map.q.out @@ -40,11 +40,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 31) - Reducer 3 <- Map 1 (GROUP, 31) + Reducer 2 <- Map 4 (GROUP, 31) + Reducer 3 <- Map 5 (GROUP, 31) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -65,6 +65,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value diff --git ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out index 2e43287..b76a002 100644 --- ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out +++ ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out @@ -40,13 +40,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 31) Reducer 3 <- Reducer 2 (GROUP, 31) - Reducer 4 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 31) Reducer 5 <- Reducer 4 (GROUP, 31) #### A masked pattern was here #### Vertices: - Map 1 + Map 6 Map Operator Tree: TableScan alias: src @@ -67,6 +67,11 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value diff --git ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out index ccfdf39..7f8bbfe 100644 --- ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out +++ ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out @@ -40,11 +40,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 31) - Reducer 3 <- Map 1 (GROUP, 31) + Reducer 2 <- Map 4 (GROUP, 31) + Reducer 3 <- Map 5 (GROUP, 31) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -59,6 +59,11 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: substr(value, 5) (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value diff --git ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out index 396c6a4..085a2ac 100644 --- ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out +++ ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out @@ -40,9 +40,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 31) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (SORT, 31) + Reducer 6 <- Map 1 (SORT, 31) #### A masked pattern was here #### Vertices: Map 1 @@ -60,44 +61,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: substr(value, 5) (type: string) - Reducer 2 - Reduce Operator Tree: - Forward - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: double) - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: double) Reducer 3 Reduce Operator Tree: Select Operator @@ -140,6 +103,48 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Reducer 5 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: double) + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: double) Stage: Stage-3 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/groupby8.q.out ql/src/test/results/clientpositive/spark/groupby8.q.out index d7c5b6d..33d7d11 100644 --- ql/src/test/results/clientpositive/spark/groupby8.q.out +++ ql/src/test/results/clientpositive/spark/groupby8.q.out @@ -40,9 +40,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 6 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -56,34 +57,6 @@ STAGE PLANS: Map-reduce partition columns: substr(value, 5) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string) - Reducer 2 - Reduce Operator Tree: - Forward - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Group By Operator - aggregations: count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -124,6 +97,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Reducer 5 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Stage: Stage-3 Dependency Collection @@ -828,9 +833,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 6 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -844,34 +850,6 @@ STAGE PLANS: Map-reduce partition columns: substr(value, 5) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string) - Reducer 2 - Reduce Operator Tree: - Forward - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Group By Operator - aggregations: count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -912,6 +890,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Reducer 5 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Stage: Stage-3 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/groupby8_map.q.out ql/src/test/results/clientpositive/spark/groupby8_map.q.out index 9d491be..8beb293 100644 --- ql/src/test/results/clientpositive/spark/groupby8_map.q.out +++ ql/src/test/results/clientpositive/spark/groupby8_map.q.out @@ -40,9 +40,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 31) - Reducer 3 <- Reducer 2 (GROUP, 31) - Reducer 4 <- Reducer 2 (GROUP, 31) + Reducer 3 <- Reducer 5 (GROUP, 31) + Reducer 4 <- Reducer 6 (GROUP, 31) + Reducer 5 <- Map 1 (SORT, 31) + Reducer 6 <- Map 1 (SORT, 31) #### A masked pattern was here #### Vertices: Map 1 @@ -56,34 +57,6 @@ STAGE PLANS: Map-reduce partition columns: substr(value, 5) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string) - Reducer 2 - Reduce Operator Tree: - Forward - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Group By Operator - aggregations: count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -124,6 +97,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Reducer 5 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Stage: Stage-3 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out index 9d491be..8beb293 100644 --- ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out +++ ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out @@ -40,9 +40,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 31) - Reducer 3 <- Reducer 2 (GROUP, 31) - Reducer 4 <- Reducer 2 (GROUP, 31) + Reducer 3 <- Reducer 5 (GROUP, 31) + Reducer 4 <- Reducer 6 (GROUP, 31) + Reducer 5 <- Map 1 (SORT, 31) + Reducer 6 <- Map 1 (SORT, 31) #### A masked pattern was here #### Vertices: Map 1 @@ -56,34 +57,6 @@ STAGE PLANS: Map-reduce partition columns: substr(value, 5) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string) - Reducer 2 - Reduce Operator Tree: - Forward - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Group By Operator - aggregations: count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -124,6 +97,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Reducer 5 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Stage: Stage-3 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out index 9d491be..8beb293 100644 --- ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out +++ ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out @@ -40,9 +40,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 31) - Reducer 3 <- Reducer 2 (GROUP, 31) - Reducer 4 <- Reducer 2 (GROUP, 31) + Reducer 3 <- Reducer 5 (GROUP, 31) + Reducer 4 <- Reducer 6 (GROUP, 31) + Reducer 5 <- Map 1 (SORT, 31) + Reducer 6 <- Map 1 (SORT, 31) #### A masked pattern was here #### Vertices: Map 1 @@ -56,34 +57,6 @@ STAGE PLANS: Map-reduce partition columns: substr(value, 5) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string) - Reducer 2 - Reduce Operator Tree: - Forward - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Group By Operator - aggregations: count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -124,6 +97,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Reducer 5 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Stage: Stage-3 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/groupby9.q.out ql/src/test/results/clientpositive/spark/groupby9.q.out index de795a0..ea218bf 100644 --- ql/src/test/results/clientpositive/spark/groupby9.q.out +++ ql/src/test/results/clientpositive/spark/groupby9.q.out @@ -40,9 +40,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 6 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -56,34 +57,6 @@ STAGE PLANS: Map-reduce partition columns: substr(value, 5) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), value (type: string) - Reducer 2 - Reduce Operator Tree: - Forward - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Group By Operator - aggregations: count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: string), VALUE._col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -124,6 +97,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Reducer 5 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string), VALUE._col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Stage: Stage-3 Dependency Collection @@ -829,9 +834,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 6 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -845,34 +851,6 @@ STAGE PLANS: Map-reduce partition columns: substr(value, 5) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), value (type: string) - Reducer 2 - Reduce Operator Tree: - Forward - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Group By Operator - aggregations: count(DISTINCT KEY._col0) - keys: VALUE._col1 (type: string), VALUE._col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -913,6 +891,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Reducer 5 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col1 (type: string), VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Stage: Stage-3 Dependency Collection @@ -1618,9 +1628,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 6 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1634,34 +1645,6 @@ STAGE PLANS: Map-reduce partition columns: substr(value, 5) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), value (type: string) - Reducer 2 - Reduce Operator Tree: - Forward - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Group By Operator - aggregations: count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: string), VALUE._col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1702,6 +1685,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Reducer 5 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string), VALUE._col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Stage: Stage-3 Dependency Collection @@ -2407,11 +2422,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -2432,6 +2447,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value @@ -3193,9 +3213,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 6 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3209,34 +3230,6 @@ STAGE PLANS: Map-reduce partition columns: substr(value, 5) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), value (type: string) - Reducer 2 - Reduce Operator Tree: - Forward - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Group By Operator - aggregations: count(DISTINCT KEY._col0) - keys: VALUE._col1 (type: string), VALUE._col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -3277,6 +3270,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Reducer 5 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col1 (type: string), VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Stage: Stage-3 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/groupby_complex_types.q.out ql/src/test/results/clientpositive/spark/groupby_complex_types.q.out index 7585064..c2c9082 100644 --- ql/src/test/results/clientpositive/spark/groupby_complex_types.q.out +++ ql/src/test/results/clientpositive/spark/groupby_complex_types.q.out @@ -52,12 +52,12 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -78,6 +78,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: array) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + Map 6 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value @@ -94,6 +99,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: map) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value diff --git ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out index 7e4fa35..f5a1c6e 100644 --- ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out +++ ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out @@ -40,13 +40,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 6 (GROUP, 1) Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Map 1 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 1) Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 6 Map Operator Tree: TableScan alias: src @@ -67,6 +67,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: array) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value diff --git ql/src/test/results/clientpositive/spark/groupby_cube1.q.out ql/src/test/results/clientpositive/spark/groupby_cube1.q.out index 0d0f922..443af21 100644 --- ql/src/test/results/clientpositive/spark/groupby_cube1.q.out +++ ql/src/test/results/clientpositive/spark/groupby_cube1.q.out @@ -405,13 +405,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 1) Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 6 Map Operator Tree: TableScan alias: t1 @@ -432,6 +432,11 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col3 (type: bigint) + Map 7 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: string), val (type: string) outputColumnNames: key, val diff --git ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out index c1fd88d..182478b 100644 --- ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out +++ ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out @@ -40,9 +40,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 6 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -56,34 +57,6 @@ STAGE PLANS: Map-reduce partition columns: value (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), (key + key) (type: double) - Reducer 2 - Reduce Operator Tree: - Forward - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Group By Operator - aggregations: count(DISTINCT KEY._col0) - keys: VALUE._col1 (type: double) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -124,6 +97,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Reducer 5 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col1 (type: double) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Stage: Stage-3 Dependency Collection @@ -224,11 +229,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -248,6 +253,11 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value diff --git ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out index 2dab62f..e77ecc3 100644 --- ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out +++ ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out @@ -304,12 +304,12 @@ STAGE PLANS: Stage: Stage-5 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -324,6 +324,11 @@ STAGE PLANS: Map-reduce partition columns: substr(key, 1, 1) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value diff --git ql/src/test/results/clientpositive/spark/groupby_position.q.out ql/src/test/results/clientpositive/spark/groupby_position.q.out index 697184b..5e044da 100644 --- ql/src/test/results/clientpositive/spark/groupby_position.q.out +++ ql/src/test/results/clientpositive/spark/groupby_position.q.out @@ -40,11 +40,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -67,6 +67,11 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 20) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -238,11 +243,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -265,6 +270,11 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 20) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out index eb87c7e..fede4a9 100644 --- ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out +++ ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out @@ -393,13 +393,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 1) Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 6 Map Operator Tree: TableScan alias: t1 @@ -420,6 +420,11 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col3 (type: bigint) + Map 7 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: string), val (type: string) outputColumnNames: key, val diff --git ql/src/test/results/clientpositive/spark/input1_limit.q.out ql/src/test/results/clientpositive/spark/input1_limit.q.out index e9f8d5e..c305e90 100644 --- ql/src/test/results/clientpositive/spark/input1_limit.q.out +++ ql/src/test/results/clientpositive/spark/input1_limit.q.out @@ -40,11 +40,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -63,6 +63,11 @@ STAGE PLANS: sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 100) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/spark/insert_into3.q.out ql/src/test/results/clientpositive/spark/insert_into3.q.out index 1313d26..e20306d 100644 --- ql/src/test/results/clientpositive/spark/insert_into3.q.out +++ ql/src/test/results/clientpositive/spark/insert_into3.q.out @@ -40,11 +40,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Map 1 (SORT, 1) + Reducer 2 <- Map 4 (SORT, 1) + Reducer 3 <- Map 5 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -57,6 +57,11 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -197,11 +202,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -217,6 +222,11 @@ STAGE PLANS: sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out index e054664..8aa82d2 100644 --- ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out +++ ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out @@ -1614,11 +1614,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -1638,6 +1638,11 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string), key (type: string) outputColumnNames: value, key diff --git ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out index ef0d3ef..d635afe 100644 --- ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out +++ ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out @@ -296,11 +296,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src_10 @@ -356,6 +356,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) + Map 5 + Map Operator Tree: + TableScan + alias: src_10 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Lateral View Forward Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -567,11 +572,11 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 1 (SORT, 1) + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src_10 @@ -627,6 +632,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) + Map 5 + Map Operator Tree: + TableScan + alias: src_10 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key > 200) or (key < 200)) (type: boolean) Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE @@ -857,12 +867,12 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src_10 @@ -916,6 +926,11 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: src_10 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Lateral View Forward Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -965,6 +980,11 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Map 7 + Map Operator Tree: + TableScan + alias: src_10 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string), key (type: string) outputColumnNames: value, key @@ -1221,12 +1241,12 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src_10 @@ -1280,6 +1300,11 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: src_10 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Lateral View Forward Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -1329,6 +1354,11 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Map 7 + Map Operator Tree: + TableScan + alias: src_10 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key > 200) or (key < 200)) (type: boolean) Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/spark/multi_insert_mixed.q.out ql/src/test/results/clientpositive/spark/multi_insert_mixed.q.out index dbf3a00..f48439e 100644 --- ql/src/test/results/clientpositive/spark/multi_insert_mixed.q.out +++ ql/src/test/results/clientpositive/spark/multi_insert_mixed.q.out @@ -54,13 +54,13 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 6 (GROUP, 1) Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 4 <- Map 1 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 1) Reducer 5 <- Reducer 4 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 6 Map Operator Tree: TableScan alias: src @@ -81,6 +81,26 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + Filter Operator + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi3 + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) outputColumnNames: value @@ -97,21 +117,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Filter Operator - predicate: (key < 10) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi3 Reducer 2 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out index db158eb..022079f 100644 --- ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out +++ ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out @@ -2417,9 +2417,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 4 <- Reducer 2 (SORT, 1) + Reducer 3 <- Reducer 5 (SORT, 1) + Reducer 4 <- Reducer 6 (SORT, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 6 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2439,7 +2440,33 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -2461,6 +2488,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -2479,32 +2510,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -2551,9 +2556,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 4 <- Reducer 2 (SORT, 1) + Reducer 3 <- Reducer 5 (SORT, 1) + Reducer 4 <- Reducer 6 (SORT, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 6 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2573,7 +2579,33 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -2595,6 +2627,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -2613,32 +2649,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -2685,9 +2695,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 4 <- Reducer 2 (SORT, 1) + Reducer 3 <- Reducer 5 (SORT, 1) + Reducer 4 <- Reducer 6 (SORT, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 6 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2707,7 +2718,33 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -2729,6 +2766,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -2747,32 +2788,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -2819,9 +2834,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 4 <- Reducer 2 (SORT, 1) + Reducer 3 <- Reducer 5 (SORT, 1) + Reducer 4 <- Reducer 6 (SORT, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 6 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2841,7 +2857,33 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -2863,6 +2905,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -2881,32 +2927,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -2961,9 +2981,10 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 4 <- Reducer 2 (SORT, 1) + Reducer 3 <- Reducer 5 (SORT, 1) + Reducer 4 <- Reducer 6 (SORT, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 6 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3013,7 +3034,33 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -3035,6 +3082,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -3053,32 +3104,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Dependency Collection @@ -3206,9 +3231,10 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 4 <- Reducer 2 (SORT, 1) + Reducer 3 <- Reducer 5 (SORT, 1) + Reducer 4 <- Reducer 6 (SORT, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 6 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3258,7 +3284,33 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -3280,6 +3332,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -3298,32 +3354,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Dependency Collection @@ -3451,9 +3481,10 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 4 <- Reducer 2 (SORT, 1) + Reducer 3 <- Reducer 5 (SORT, 1) + Reducer 4 <- Reducer 6 (SORT, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 6 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3503,7 +3534,33 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -3525,6 +3582,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -3543,32 +3604,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Dependency Collection @@ -3696,9 +3731,10 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 4 <- Reducer 2 (SORT, 1) + Reducer 3 <- Reducer 5 (SORT, 1) + Reducer 4 <- Reducer 6 (SORT, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 6 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3748,7 +3784,33 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -3770,6 +3832,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -3788,32 +3854,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/multigroupby_singlemr.q.out ql/src/test/results/clientpositive/spark/multigroupby_singlemr.q.out index 475f624..1a14ff4 100644 --- ql/src/test/results/clientpositive/spark/multigroupby_singlemr.q.out +++ ql/src/test/results/clientpositive/spark/multigroupby_singlemr.q.out @@ -60,11 +60,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: tbl @@ -85,6 +85,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) + Map 5 + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: c1 (type: int), c2 (type: int), c3 (type: int) outputColumnNames: c1, c2, c3 @@ -193,11 +198,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: tbl @@ -218,6 +223,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) + Map 5 + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: c2 (type: int), c1 (type: int), c3 (type: int) outputColumnNames: c2, c1, c3 @@ -326,11 +336,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: tbl @@ -351,6 +361,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col3 (type: bigint) + Map 5 + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: c1 (type: int), c2 (type: int), c3 (type: int) outputColumnNames: c1, c2, c3 @@ -573,12 +588,12 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: tbl @@ -599,6 +614,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col3 (type: bigint) + Map 6 + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: c1 (type: int), c2 (type: int), c3 (type: int) outputColumnNames: c1, c2, c3 @@ -615,6 +635,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col2 (type: bigint) + Map 7 + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: c1 (type: int), c2 (type: int) outputColumnNames: c1, c2 diff --git ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out index 85939c0..c5f68c6 100644 --- ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out +++ ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out @@ -68,14 +68,14 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 9 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 10 (PARTITION-LEVEL SORT, 1), Reducer 9 (PARTITION-LEVEL SORT, 1) Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Reducer 3 (SORT, 1) - Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 11 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) Reducer 9 <- Map 8 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 10 Map Operator Tree: TableScan alias: b @@ -84,6 +84,11 @@ STAGE PLANS: sort order: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), value (type: string) + Map 11 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ @@ -501,14 +506,14 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 9 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 10 (PARTITION-LEVEL SORT, 1), Reducer 9 (PARTITION-LEVEL SORT, 1) Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Reducer 3 (SORT, 1) - Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 11 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) Reducer 9 <- Map 8 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 10 Map Operator Tree: TableScan alias: b @@ -517,6 +522,11 @@ STAGE PLANS: sort order: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), value (type: string) + Map 11 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ diff --git ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out index 3f069ab..5968af6 100644 --- ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out @@ -5202,10 +5202,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 6 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 5 <- Reducer 7 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 7 <- Map 1 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -5271,33 +5272,6 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] - Reducer 2 - Needs Tagging: false - Reduce Operator Tree: - Extract - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col5 (type: int) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - auto parallelism: false - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -5387,6 +5361,40 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Reducer 6 + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col5 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + auto parallelism: false + Reducer 7 + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + auto parallelism: false Stage: Stage-3 Dependency Collection