diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 558dd02..3ae929f 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -537,6 +537,7 @@ spark.query.files=add_part_multiple.q \ scriptfile1.q \ sort.q \ spark_test.q \ + spark_multi_insert_split_work.q \ subquery_multiinsert.q \ temp_table.q \ timestamp_1.q \ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 7d9feac..ceb4056 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -972,6 +972,24 @@ public static MapredWork clonePlan(MapredWork plan) { } /** + * Clones using the powers of XML. Do not use unless necessary. + * @param plan The plan. + * @return The clone. + */ + public static BaseWork cloneBaseWork(BaseWork plan) { + // TODO: same as above + PerfLogger perfLogger = PerfLogger.getPerfLogger(); + perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.CLONE_PLAN); + ByteArrayOutputStream baos = new ByteArrayOutputStream(4096); + Configuration conf = new HiveConf(); + serializePlan(plan, baos, conf, true); + BaseWork newPlan = deserializePlan(new ByteArrayInputStream(baos.toByteArray()), + plan.getClass(), conf, true); + perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.CLONE_PLAN); + return newPlan; + } + + /** * Serialize the object. This helper function mainly makes sure that enums, * counters, etc are handled properly. */ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java index 5153885..02ecc92 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java @@ -20,13 +20,9 @@ import java.util.Iterator; -import org.apache.hadoop.hive.ql.exec.mr.ExecReducer; import org.apache.hadoop.hive.ql.io.HiveKey; import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Reporter; -import org.apache.spark.TaskContext; -import org.apache.spark.api.java.function.PairFlatMapFunction; import scala.Tuple2; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/MapInput.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/MapInput.java index 3fd37a0..1445e0b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/MapInput.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/MapInput.java @@ -19,10 +19,13 @@ package org.apache.hadoop.hive.ql.exec.spark; import org.apache.hadoop.hive.ql.io.HiveKey; +import org.apache.hadoop.io.BinaryComparable; import org.apache.hadoop.io.BytesWritable; import org.apache.spark.api.java.JavaPairRDD; import com.google.common.base.Preconditions; +import org.apache.spark.api.java.function.PairFunction; +import scala.Tuple2; public class MapInput implements SparkTran { private JavaPairRDD hadoopRDD; @@ -46,7 +49,30 @@ public void setToCache(boolean toCache) { JavaPairRDD input) { Preconditions.checkArgument(input == null, "AssertionError: MapInput doesn't take any input"); - return toCache ? hadoopRDD.cache() : hadoopRDD; + JavaPairRDD result = hadoopRDD; + if (toCache) { + result = result.mapToPair(new CopyFunction()); + return result.cache(); + } else { + return result; + } + } + + private static class CopyFunction implements PairFunction, + BytesWritable, BytesWritable> { + + private BytesWritable copyBytesWritable(BytesWritable bc) { + BytesWritable bw = new BytesWritable(); + bw.set(bc.getBytes(), 0, bc.getLength()); + return bw; + } + + @Override + public Tuple2 call(Tuple2 tup) throws Exception { + // no need to copy key since it never get used in HiveMapFunction + BytesWritable value = copyBytesWritable(tup._2()); + return new Tuple2(tup._1(), value); + } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java index 126cb9f..dd5c3ba 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java @@ -18,15 +18,13 @@ package org.apache.hadoop.hive.ql.exec.spark; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - import com.google.common.base.Preconditions; import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.io.merge.MergeFileMapper; import org.apache.hadoop.hive.ql.io.merge.MergeFileOutputFormat; import org.apache.hadoop.hive.ql.io.merge.MergeFileWork; @@ -46,7 +44,6 @@ import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.plan.SparkEdgeProperty; import org.apache.hadoop.hive.ql.plan.SparkWork; -import org.apache.hadoop.hive.ql.plan.UnionWork; import org.apache.hadoop.hive.ql.stats.StatsFactory; import org.apache.hadoop.hive.ql.stats.StatsPublisher; import org.apache.hadoop.hive.shims.ShimLoader; @@ -57,6 +54,16 @@ import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaSparkContext; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Queue; +import java.util.Set; + public class SparkPlanGenerator { private static final Log LOG = LogFactory.getLog(SparkPlanGenerator.class); @@ -64,6 +71,9 @@ private final JobConf jobConf; private Context context; private Path scratchDir; + private final Map cloneToWork; + private final Map workToTranMap; + private final Map workToParentWorkTranMap; public SparkPlanGenerator(JavaSparkContext sc, Context context, JobConf jobConf, Path scratchDir) { @@ -71,31 +81,31 @@ public SparkPlanGenerator(JavaSparkContext sc, Context context, this.context = context; this.jobConf = jobConf; this.scratchDir = scratchDir; + this.cloneToWork = new HashMap(); + this.workToTranMap = new HashMap(); + this.workToParentWorkTranMap = new HashMap(); } public SparkPlan generate(SparkWork sparkWork) throws Exception { SparkPlan sparkPlan = new SparkPlan(); - Map workToTranMap = new HashMap(); + cloneToWork.clear(); + workToTranMap.clear(); + workToParentWorkTranMap.clear(); + + splitSparkWork(sparkWork); for (BaseWork work : sparkWork.getAllWork()) { SparkTran tran; if (work instanceof MapWork) { - MapInput mapInput = generateMapInput((MapWork)work); - sparkPlan.addTran(mapInput); + SparkTran mapInput = getParentTran(sparkPlan, sparkWork, work); tran = generate((MapWork)work); sparkPlan.addTran(tran); sparkPlan.connect(mapInput, tran); } else if (work instanceof ReduceWork) { - List parentWorks = sparkWork.getParents(work); + SparkTran shuffleTran = getParentTran(sparkPlan, sparkWork, work); tran = generate((ReduceWork)work); sparkPlan.addTran(tran); - ShuffleTran shuffleTran = generate(sparkWork.getEdgeProperty(parentWorks.get(0), work)); - sparkPlan.addTran(shuffleTran); sparkPlan.connect(shuffleTran, tran); - for (BaseWork parentWork : parentWorks) { - SparkTran parentTran = workToTranMap.get(parentWork); - sparkPlan.connect(parentTran, shuffleTran); - } } else { List parentWorks = sparkWork.getParents(work); tran = new IdentityTran(); @@ -105,12 +115,144 @@ public SparkPlan generate(SparkWork sparkWork) throws Exception { sparkPlan.connect(parentTran, tran); } } + workToTranMap.put(work, tran); } return sparkPlan; } + // Get (possibly cached) parent SparkTran + private SparkTran getParentTran(SparkPlan sparkPlan, SparkWork sparkWork, BaseWork work) throws Exception { + if (cloneToWork.containsKey(work)) { + BaseWork originalWork = cloneToWork.get(work); + if (workToParentWorkTranMap.containsKey(originalWork)) { + return workToParentWorkTranMap.get(originalWork); + } + } + + SparkTran result; + if (work instanceof MapWork) { + result = generateMapInput((MapWork)work); + sparkPlan.addTran(result); + } else if (work instanceof ReduceWork) { + List parentWorks = sparkWork.getParents(work); + result = generate(sparkWork.getEdgeProperty(parentWorks.get(0), work), cloneToWork.containsKey(work)); + sparkPlan.addTran(result); + for (BaseWork parentWork : parentWorks) { + sparkPlan.connect(workToTranMap.get(parentWork), result); + } + } else { + throw new IllegalStateException("AssertionError: getParentTran() only expect MapWork or ReduceWork," + + " but found " + work.getClass().getName()); + } + + if (cloneToWork.containsKey(work)) { + workToParentWorkTranMap.put(cloneToWork.get(work), result); + } + + return result; + } + + + private void splitSparkWork(SparkWork sparkWork) { + // do a BFS on the sparkWork graph, and look for any work that has more than one child. + // If we found such a work, we split it into multiple ones, one for each of its child. + Queue queue = new LinkedList(); + Set visited = new HashSet(); + queue.addAll(sparkWork.getRoots()); + while (!queue.isEmpty()) { + BaseWork work = queue.poll(); + if (!visited.add(work)) { + continue; + } + + List childWorks = sparkWork.getChildren(work); + // First, add all children of this work into queue, to be processed later. + for (BaseWork w : childWorks) { + queue.add(w); + } + + // Second, check if this work has multiple reduceSinks. If so, do split. + splitBaseWork(sparkWork, work, childWorks); + } + } + + private Set> getAllReduceSinks(BaseWork work) { + Set> resultSet = work.getAllLeafOperators(); + Iterator> it = resultSet.iterator(); + while (it.hasNext()) { + if (!(it.next() instanceof ReduceSinkOperator)) { + it.remove(); + } + } + return resultSet; + } + + // Split work into multiple branches, one for each childWork in childWorks. + // It also set up the connection between each parent work and child work. + private void splitBaseWork(SparkWork sparkWork, BaseWork parentWork, List childWorks) { + // Grand-parent works - we need to set these to be the parents of the cloned works. + List grandParentWorks = sparkWork.getParents(parentWork); + + if (getAllReduceSinks(parentWork).size() <= 1) { + return; + } + + boolean isFirst = true; + for (BaseWork childWork : childWorks) { + BaseWork clonedParentWork = Utilities.cloneBaseWork(parentWork); + String childReducerName = childWork.getName(); + SparkEdgeProperty clonedEdgeProperty = sparkWork.getEdgeProperty(parentWork, childWork); + + // We need to remove those branches that + // 1, ended with a ReduceSinkOperator, and + // 2, the ReduceSinkOperator's name is not the same as childReducerName. + // Also, if the cloned work is not the first, we remove ALL leaf operators except + // the corresponding ReduceSinkOperator. + for (Operator op : clonedParentWork.getAllLeafOperators()) { + if (op instanceof ReduceSinkOperator) { + if (!((ReduceSinkOperator)op).getConf().getOutputName().equals(childReducerName)) { + removeOpRecursive(op); + } + } else if (!isFirst) { + removeOpRecursive(op); + } + } + + if (isFirst) { + isFirst = false; + } + + // Then, we need to set up the graph connection. Especially: + // 1, we need to connect this cloned parent work with all the grand-parent works. + // 2, we need to connect this cloned parent work with the corresponding child work. + sparkWork.add(clonedParentWork); + for (BaseWork gpw : grandParentWorks) { + sparkWork.connect(gpw, clonedParentWork, sparkWork.getEdgeProperty(gpw, parentWork)); + } + sparkWork.connect(clonedParentWork, childWork, clonedEdgeProperty); + cloneToWork.put(clonedParentWork, parentWork); + } + + sparkWork.remove(parentWork); + } + + // Remove op from all its parents' child list. + // Recursively remove any of its parent who only have this op as child. + private void removeOpRecursive(Operator operator) { + List> parentOperators = new ArrayList>(); + for (Operator op : operator.getParentOperators()) parentOperators.add(op); + for (Operator parentOperator : parentOperators) { + Preconditions.checkArgument(parentOperator.getChildOperators().contains(operator), + "AssertionError: parent of " + operator.getName() + " doesn't have it as child."); + parentOperator.removeChild(operator); + if (parentOperator.getNumChild() == 0) { + removeOpRecursive(parentOperator); + } + } + } + private Class getInputFormat(JobConf jobConf, MapWork mWork) throws HiveException { // MergeFileWork is sub-class of MapWork, we don't need to distinguish here if (mWork.getInputformat() != null) { @@ -147,10 +289,10 @@ private MapInput generateMapInput(MapWork mapWork) JavaPairRDD hadoopRDD = sc.hadoopRDD(jobConf, ifClass, WritableComparable.class, Writable.class); - return new MapInput(hadoopRDD); + return new MapInput(hadoopRDD, false /*TODO: fix this after resolving HIVE-8457: cloneToWork.containsKey(mapWork)*/); } - private ShuffleTran generate(SparkEdgeProperty edge) { + private ShuffleTran generate(SparkEdgeProperty edge, boolean needCache) { Preconditions.checkArgument(!edge.isShuffleNone(), "AssertionError: SHUFFLE_NONE should only be used for UnionWork."); SparkShuffler shuffler; @@ -161,7 +303,7 @@ private ShuffleTran generate(SparkEdgeProperty edge) { } else { shuffler = new GroupByShuffler(); } - return new ShuffleTran(shuffler, edge.getNumPartitions()); + return new ShuffleTran(shuffler, edge.getNumPartitions(), needCache); } private MapTran generate(MapWork mw) throws Exception { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java index d7744e9..ed88c60 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java @@ -77,10 +77,6 @@ // walk. public Operator parentOfRoot; - // Default task is the task we use for those operators that are not connected - // to the newly generated TS - public SparkTask defaultTask; - // Spark task we're currently processing public SparkTask currentTask; @@ -88,20 +84,6 @@ // one. public BaseWork preceedingWork; - // All operators that we should unlink with their parents, for multi-table insertion - // It's a mapping from operator to its ONLY parent. - public Map, Operator> opToParentMap; - - // A mapping from operators to their corresponding tasks. - // The key for this map could only be: - // 1. TableScanOperators (so we know which task for the tree rooted at this TS) - // 2. FileSinkOperators (need this info in GenSparkUtils::processFileSinks) - // 3. UnionOperator/JoinOperator (need for merging tasks) - public final Map, SparkTask> opToTaskMap; - - // temporary TS generated for multi-table insertion - public final Set tempTS; - // map that keeps track of the last operator of a task to the work // that follows it. This is used for connecting them later. public final Map, BaseWork> leafOperatorToFollowingWork; @@ -157,10 +139,9 @@ public GenSparkProcContext(HiveConf conf, ParseContext parseContext, this.rootTasks = rootTasks; this.inputs = inputs; this.outputs = outputs; - this.defaultTask = (SparkTask) TaskFactory.get( + this.currentTask = (SparkTask) TaskFactory.get( new SparkWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID)), conf); - this.rootTasks.add(defaultTask); - this.currentTask = null; + this.rootTasks.add(currentTask); this.leafOperatorToFollowingWork = new LinkedHashMap, BaseWork>(); this.linkOpWithWorkMap = new LinkedHashMap, Map>(); this.linkWorkWithReduceSinkMap = new LinkedHashMap>(); @@ -178,8 +159,5 @@ public GenSparkProcContext(HiveConf conf, ParseContext parseContext, this.clonedReduceSinks = new LinkedHashSet(); this.fileSinkSet = new LinkedHashSet(); this.connectedReduceSinks = new LinkedHashSet(); - this.opToParentMap = new LinkedHashMap, Operator>(); - this.opToTaskMap = new LinkedHashMap, SparkTask>(); - this.tempTS = new LinkedHashSet(); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java index 280edde..8e28887 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java @@ -170,16 +170,6 @@ public MapWork createMapWork(GenSparkProcContext context, Operator root, return mapWork; } - // Create a MapWork for a temporary TableScanOperator - // Basically a thin wrapper on GenMapRedUtils.setTaskPlan. - public MapWork createMapWork(TableScanOperator root, - SparkWork sparkWork, String path, TableDesc tt_desc) throws SemanticException { - MapWork mapWork = new MapWork("Map " + (++sequenceNumber)); - GenMapRedUtils.setTaskPlan(path, path, root, mapWork, false, tt_desc); - sparkWork.add(mapWork); - return mapWork; - } - // this method's main use is to help unit testing this class protected void setupMapWork(MapWork mapWork, GenSparkProcContext context, PrunedPartitionList partitions, Operator root, @@ -274,19 +264,15 @@ public void processFileSink(GenSparkProcContext context, FileSinkOperator fileSi throws SemanticException { ParseContext parseContext = context.parseContext; - Preconditions.checkArgument(context.opToTaskMap.containsKey(fileSink), - "AssertionError: the fileSink " + fileSink.getName() + " should be in the context"); - - SparkTask currentTask = context.opToTaskMap.get(fileSink); boolean isInsertTable = // is INSERT OVERWRITE TABLE GenMapRedUtils.isInsertInto(parseContext, fileSink); HiveConf hconf = parseContext.getConf(); boolean chDir = GenMapRedUtils.isMergeRequired(context.moveTask, - hconf, fileSink, currentTask, isInsertTable); + hconf, fileSink, context.currentTask, isInsertTable); - Path finalName = GenMapRedUtils.createMoveTask(currentTask, + Path finalName = GenMapRedUtils.createMoveTask(context.currentTask, chDir, fileSink, parseContext, context.moveTask, hconf, context.dependencyTask); if (chDir) { @@ -295,13 +281,13 @@ public void processFileSink(GenSparkProcContext context, FileSinkOperator fileSi logger.info("using CombineHiveInputformat for the merge job"); GenMapRedUtils.createMRWorkForMergingFiles(fileSink, finalName, context.dependencyTask, context.moveTask, - hconf, currentTask); + hconf, context.currentTask); } FetchTask fetchTask = parseContext.getFetchTask(); - if (fetchTask != null && currentTask.getNumChild() == 0) { + if (fetchTask != null && context.currentTask.getNumChild() == 0) { if (fetchTask.isFetchFrom(fileSink.getConf())) { - currentTask.setFetchSource(true); + context.currentTask.setFetchSource(true); } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWork.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWork.java index ac94ea0..4f5feca 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWork.java @@ -90,10 +90,6 @@ public Object process(Node nd, Stack stack, return null; } - if (operator instanceof FileSinkOperator) { - context.opToTaskMap.put(operator, context.currentTask); - } - SparkWork sparkWork = context.currentTask.getWork(); // Right now the work graph is pretty simple. If there is no diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java index 644c681..1c663c4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java @@ -34,7 +34,6 @@ import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.exec.ConditionalTask; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; -import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; @@ -139,39 +138,9 @@ protected void generateTaskTree(List> rootTasks, Pa GenSparkProcContext procCtx = new GenSparkProcContext( conf, tempParseContext, mvTask, rootTasks, inputs, outputs); - // -------------------- First Pass --------------------- - - Map opRules = new LinkedHashMap(); - opRules.put(new RuleRegExp("TS", TableScanOperator.getOperatorName() + "%"), - new SparkTableScanProcessor()); - - Dispatcher disp = new DefaultRuleDispatcher(new SparkMultiInsertionProcessor(), opRules, procCtx); - ArrayList topNodes = new ArrayList(); - topNodes.addAll(pCtx.getTopOps().values()); - GraphWalker ogw = new GenSparkWorkWalker(disp, procCtx); - ogw.startWalking(topNodes, null); - - // ------------------- Second Pass ---------------------- - - // Merge tasks upon Join/Union if possible - opRules.clear(); - opRules.put(new RuleRegExp("Join", JoinOperator.getOperatorName() + "%"), - new SparkMergeTaskProcessor()); - opRules.put(new RuleRegExp("Union", UnionOperator.getOperatorName() + "%"), - new SparkMergeTaskProcessor()); - disp = new DefaultRuleDispatcher(null, opRules, procCtx); - topNodes = new ArrayList(); - topNodes.addAll(procCtx.tempTS); // First process temp TS - topNodes.addAll(pCtx.getTopOps().values()); - ogw = new GenSparkWorkWalker(disp, procCtx); - ogw.startWalking(topNodes, null); - - - // ------------------- Third Pass ----------------------- - // create a walker which walks the tree in a DFS manner while maintaining // the operator stack. The dispatcher generates the plan from the operator tree - opRules.clear(); + Map opRules = new LinkedHashMap(); opRules.put(new RuleRegExp("Split Work - ReduceSink", ReduceSinkOperator.getOperatorName() + "%"), genSparkWork); @@ -184,17 +153,7 @@ protected void generateTaskTree(List> rootTasks, Pa opRules.put(new RuleRegExp("Handle Analyze Command", TableScanOperator.getOperatorName() + "%"), - new CompositeProcessor( - new NodeProcessor() { - @Override - public Object process(Node nd, Stack s, - NodeProcessorCtx procCtx, Object... no) throws SemanticException { - GenSparkProcContext context = (GenSparkProcContext) procCtx; - context.currentTask = context.opToTaskMap.get(nd); - return null; - } - }, - new SparkProcessAnalyzeTable(GenSparkUtils.getUtils()))); + new SparkProcessAnalyzeTable(GenSparkUtils.getUtils())); opRules.put(new RuleRegExp("Remember union", UnionOperator.getOperatorName() + "%"), new NodeProcessor() { @@ -213,11 +172,10 @@ public Object process(Node n, Stack s, // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along - disp = new DefaultRuleDispatcher(null, opRules, procCtx); - topNodes = new ArrayList(); + Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx); + List topNodes = new ArrayList(); topNodes.addAll(pCtx.getTopOps().values()); - topNodes.addAll(procCtx.tempTS); - ogw = new GenSparkWorkWalker(disp, procCtx); + GraphWalker ogw = new GenSparkWorkWalker(disp, procCtx); ogw.startWalking(topNodes, null); // we need to clone some operator plans and remove union operators still diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkMergeTaskProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkMergeTaskProcessor.java deleted file mode 100644 index 1d01040..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkMergeTaskProcessor.java +++ /dev/null @@ -1,54 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.parse.spark; - -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.spark.SparkTask; -import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.lib.NodeProcessor; -import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.parse.SemanticException; - -import java.util.Map; -import java.util.Stack; - -public class SparkMergeTaskProcessor implements NodeProcessor { - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - GenSparkProcContext context = (GenSparkProcContext) procCtx; - Operator op = (Operator) nd; - Map, SparkTask> opTable = context.opToTaskMap; - SparkTask currentTask = opTable.get(context.currentRootOperator); - if (!opTable.containsKey(op)) { - opTable.put(op, currentTask); - } else { - // If this op has already been visited, since we visit temporary TS first, - // also with the assumption that two paths from two different tembporary TS will NOT - // meet, the current task must be the default task. - // TODO: better we can prove that they'll never meet. - SparkTask existingTask = opTable.get(op); - if (currentTask == context.defaultTask && existingTask != context.defaultTask) { - opTable.put(context.currentRootOperator, existingTask); - } - } - - return null; - } -} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkMultiInsertionProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkMultiInsertionProcessor.java deleted file mode 100644 index 93940bc..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkMultiInsertionProcessor.java +++ /dev/null @@ -1,149 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.parse.spark; - -import com.google.common.base.Preconditions; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.exec.FileSinkOperator; -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.OperatorFactory; -import org.apache.hadoop.hive.ql.exec.TableScanOperator; -import org.apache.hadoop.hive.ql.exec.TaskFactory; -import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.exec.spark.SparkTask; -import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.lib.NodeProcessor; -import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; -import org.apache.hadoop.hive.ql.parse.ParseContext; -import org.apache.hadoop.hive.ql.parse.RowResolver; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.FileSinkDesc; -import org.apache.hadoop.hive.ql.plan.MapWork; -import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.hive.ql.plan.PlanUtils; -import org.apache.hadoop.hive.ql.plan.SparkWork; -import org.apache.hadoop.hive.ql.plan.TableDesc; - -import java.util.HashSet; -import java.util.Set; -import java.util.Stack; - - -public class SparkMultiInsertionProcessor implements NodeProcessor { - - private Set> processed = new HashSet>(); - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - Operator op = (Operator) nd; - GenSparkProcContext context = (GenSparkProcContext) procCtx; - - if (context.opToParentMap.containsKey(op)) { - splitPlan(op, context); - context.opToParentMap.remove(op); - } - - return null; - } - - /** - * Split two tasks by creating a temporary file between them. - * - * @param op the select operator encountered - * @param context processing context - */ - @SuppressWarnings("nls") - private void splitPlan(Operator op, GenSparkProcContext context) - throws SemanticException { - Preconditions.checkArgument(op.getNumParent() == 1, - "AssertionError: expecting operator " + op + " to have only one parent," + - " but found multiple parents : " + op.getParentOperators()); - // nested multi-insertion shouldn't happen. - SparkTask parentTask = context.defaultTask; - SparkTask childTask = (SparkTask) TaskFactory.get( - new SparkWork(context.conf.getVar(HiveConf.ConfVars.HIVEQUERYID)), context.conf); - - GenSparkUtils utils = GenSparkUtils.getUtils(); - ParseContext parseCtx = context.parseContext; - parentTask.addDependentTask(childTask); - - // Generate the temporary file name - Operator parent = context.opToParentMap.get(op); - - Path taskTmpDir; - TableDesc tt_desc; - - if (processed.add(parent)) { - taskTmpDir = parseCtx.getContext().getMRTmpPath(); - tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils - .getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol")); - createTempFS(parent, taskTmpDir, tt_desc, parseCtx); - } else { - FileSinkOperator fs = (FileSinkOperator) parent.getChildOperators().get(0); - tt_desc = fs.getConf().getTableInfo(); - taskTmpDir = fs.getConf().getDirName(); - } - - TableScanOperator tableScan = createTempTS(parent, op, parseCtx); - String streamDesc = taskTmpDir.toUri().toString(); - context.opToTaskMap.put(tableScan, childTask); - context.tempTS.add(tableScan); - MapWork mapWork = utils.createMapWork(tableScan, childTask.getWork(), streamDesc, tt_desc); - context.rootToWorkMap.put(tableScan, mapWork); - } - - private void createTempFS(Operator parent, - Path taskTmpDir, TableDesc tt_desc, ParseContext parseCtx) { - boolean compressIntermediate = - parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSINTERMEDIATE); - FileSinkDesc desc = new FileSinkDesc(taskTmpDir, tt_desc, compressIntermediate); - if (compressIntermediate) { - desc.setCompressCodec(parseCtx.getConf().getVar( - HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC)); - desc.setCompressType(parseCtx.getConf().getVar( - HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE)); - } - Operator fileSinkOp = GenMapRedUtils.putOpInsertMap(OperatorFactory - .get(desc, parent.getSchema()), null, parseCtx); - - // Connect parent to fileSinkOp - parent.setChildOperators(Utilities.makeList(fileSinkOp)); - fileSinkOp.setParentOperators(Utilities.makeList(parent)); - } - - private TableScanOperator createTempTS(Operator parent, - Operator child, - ParseContext parseCtx) { - // Create a dummy TableScanOperator for the file generated through fileSinkOp - RowResolver parentRowResolver = - parseCtx.getOpParseCtx().get(parent).getRowResolver(); - TableScanOperator tableScanOp = (TableScanOperator) GenMapRedUtils.putOpInsertMap( - GenMapRedUtils.createTemporaryTableScanOperator(parent.getSchema()), - parentRowResolver, parseCtx); - - tableScanOp.setChildOperators(Utilities.makeList(child)); - child.setParentOperators(Utilities.makeList(tableScanOp)); - - return tableScanOp; - } - -} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkProcessAnalyzeTable.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkProcessAnalyzeTable.java index 20eb344..5a68990 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkProcessAnalyzeTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkProcessAnalyzeTable.java @@ -79,10 +79,6 @@ public Object process(Node nd, Stack stack, GenSparkProcContext context = (GenSparkProcContext) procContext; TableScanOperator tableScan = (TableScanOperator) nd; - // If this tableScan is a generated one for multi-insertion, ignore it - if (context.tempTS.contains(tableScan)) { - return null; - } ParseContext parseContext = context.parseContext; Class inputFormat = parseContext.getTopToTable().get(tableScan) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkTableScanProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkTableScanProcessor.java deleted file mode 100644 index a62643a..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkTableScanProcessor.java +++ /dev/null @@ -1,154 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.parse.spark; - -import com.clearspring.analytics.util.Preconditions; -import org.apache.hadoop.hive.ql.exec.FileSinkOperator; -import org.apache.hadoop.hive.ql.exec.ForwardOperator; -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.TableScanOperator; -import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.lib.NodeProcessor; -import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.OperatorDesc; - -import java.util.HashMap; -import java.util.LinkedList; -import java.util.Map; -import java.util.Queue; -import java.util.Stack; - -public class SparkTableScanProcessor implements NodeProcessor { - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - GenSparkProcContext context = (GenSparkProcContext) procCtx; - TableScanOperator tblScan = (TableScanOperator) nd; - - context.opToTaskMap.put(tblScan, context.defaultTask); - - // For multi-table insertion, we first look for potential multiple FSs that can be reached - // from this TS. In the process of searching, we also record the path to each of these FS. - // Then, we find the LCA for these FSs. - // - // That is, in scenarios like the following: - // - // OP 1 (TS, UNION, etc) - // / \ - // OP 2 OP 3 - // - // If we find such an operator, we record all of its children to context, and unlink - // them with this operator later, in SparkMultiInsertionProcessor, and it will be become: - // - // OP 1 (TS, UNION, FOR, etc) - // | - // FS - // - // TS TS - // | | - // OP 2 OP 3 - // - // where the two branches starting with TS are in different Spark tasks. - // - // Because of the restrictions on multi-insertion queries, there could only be two - // categories of TS here: one through which we can reach multiple FSs, and one through - // which we can only reach one FS. For all TS in the first category, they should only - // be able to reach the same set of FS. - // A further conclusion is, there should only be one LCA for the entire operator tree. - // - // N.B.: one special case is when OP is ForwardOperator, in which case we shouldn't break - // the tree since it's already optimized. - Map>> fsToPath - = new HashMap>>(); - Queue>> paths = - new LinkedList>>(); - Stack> p = new Stack>(); - p.push(tblScan); - paths.offer(p); - - while (!paths.isEmpty()) { - Stack> currPath = paths.poll(); - Operator currOp = currPath.peek(); - if (currOp instanceof FileSinkOperator) { - FileSinkOperator fsOp = (FileSinkOperator) currOp; - // In case there are multiple paths lead to this FS, we keep the shortest one. - // (We could also keep the longest one - it doesn't matter) - if (!fsToPath.containsKey(fsOp) || currPath.size() < fsToPath.get(fsOp).size()) { - fsToPath.put(fsOp, currPath); - } - } - - for (Operator nextOp : currOp.getChildOperators()) { - Stack> nextPath = new Stack>(); - nextPath.addAll(currPath); - nextPath.push(nextOp); - paths.offer(nextPath); - } - } - - if (fsToPath.size() > 1) { - // Now, compute the LOWEST height for all these FSs - int lowest = -1; - for (Map.Entry>> e : fsToPath.entrySet()) { - if (lowest < 0 || e.getValue().size() < lowest) { - lowest = e.getValue().size(); - } - } - - // Now, we move up those path that has length larger than the lowest - for (Stack> st : fsToPath.values()) { - while (st.size() > lowest) { - st.pop(); - } - } - - // Now, we move all paths up together, until we reach a least common ancestor - Operator lca; - while (true) { - lca = null; - boolean same = true; - for (Stack> st : fsToPath.values()) { - Operator op = st.pop(); - if (lca == null) { - lca = op; - } else if (lca != op) { - same = false; // but we still need to pop the rest.. - } - } - if (same) { - break; - } - } - - Preconditions.checkArgument(lca.getNumChild() > 1, - "AssertionError: the LCA should have multiple children, but got " + lca.getNumChild()); - - // Special case: don't break if LCA is FOR. - if (!(lca instanceof ForwardOperator)) { - for (Operator childOp : lca.getChildOperators()) { - context.opToParentMap.put(childOp, lca); - } - } - } - - return null; - } -} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java index 05be1f1..8ada9fb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java @@ -113,6 +113,27 @@ public void addDummyOp(HashTableDummyOperator dummyOp) { return returnSet; } + public Set> getAllLeafOperators() { + Set> returnSet = new LinkedHashSet>(); + Set> opSet = getAllRootOperators(); + Stack> opStack = new Stack>(); + + // add all children + opStack.addAll(opSet); + + while(!opStack.empty()) { + Operator op = opStack.pop(); + if (op.getNumChild() == 0) { + returnSet.add(op); + } + if (op.getChildOperators() != null) { + opStack.addAll(op.getChildOperators()); + } + } + + return returnSet; + } + public Map> getScratchColumnVectorTypes() { return scratchColumnVectorTypes; } diff --git a/ql/src/test/queries/clientpositive/spark_multi_insert_split_work.q b/ql/src/test/queries/clientpositive/spark_multi_insert_split_work.q new file mode 100644 index 0000000..8091121 --- /dev/null +++ b/ql/src/test/queries/clientpositive/spark_multi_insert_split_work.q @@ -0,0 +1,22 @@ +create table src_multi1 like src; +create table src_multi2 like src; +create table src_multi3 like src; + +-- This is used to test that spliting SparkWork works correctly in multi-insertion. +-- Especially, if a MapWork/ReduceWork to be splitted has FS, then there shouldn't be +-- duplicated copies of FS. + +explain +from src +insert overwrite table src_multi1 select key, count(1) group by key order by key +insert overwrite table src_multi2 select value, count(1) group by value order by value +insert overwrite table src_multi3 select * where key < 10; + +from src +insert overwrite table src_multi1 select key, count(1) group by key order by key +insert overwrite table src_multi2 select value, count(1) group by value order by value +insert overwrite table src_multi3 select * where key < 10; + +select * from src_multi1; +select * from src_multi2; +select * from src_multi3; diff --git a/ql/src/test/results/clientpositive/spark/groupby7_map.q.out b/ql/src/test/results/clientpositive/spark/groupby7_map.q.out index 2d99a81..6174986 100644 --- a/ql/src/test/results/clientpositive/spark/groupby7_map.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby7_map.q.out @@ -30,39 +30,37 @@ INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY S POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: src - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark Edges: - Reducer 4 <- Map 1 (GROUP, 31) + Reducer 2 <- Map 1 (GROUP, 31) + Reducer 3 <- Map 1 (GROUP, 31) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: src + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Group By Operator + aggregations: sum(substr(value, 5)) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: double) Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value @@ -76,7 +74,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) value expressions: _col1 (type: double) - Reducer 4 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -93,6 +91,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 Stage: Stage-3 Dependency Collection @@ -107,7 +122,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -120,48 +135,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark - Edges: - Reducer 5 <- Map 2 (GROUP, 31) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Group By Operator - aggregations: sum(substr(value, 5)) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - value expressions: _col1 (type: double) - Reducer 5 - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + Stats-Aggr Operator PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out b/ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out index ca73985..f7892b3 100644 --- a/ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out @@ -30,33 +30,18 @@ INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY S POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: src - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark Edges: + Reducer 2 <- Map 1 (GROUP SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 31) Reducer 4 <- Map 1 (GROUP SORT, 31) Reducer 5 <- Reducer 4 (GROUP, 31) #### A masked pattern was here #### @@ -64,6 +49,20 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan + alias: src + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Group By Operator + aggregations: sum(substr(value, 5)) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + value expressions: _col1 (type: double) Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value @@ -77,6 +76,35 @@ STAGE PLANS: sort order: + Map-reduce partition columns: rand() (type: double) value expressions: _col1 (type: double) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: partials + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: double) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Reducer 4 Reduce Operator Tree: Group By Operator @@ -105,7 +133,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + name: default.dest2 Stage: Stage-3 Dependency Collection @@ -120,7 +148,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -133,61 +161,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark - Edges: - Reducer 6 <- Map 2 (GROUP SORT, 31) - Reducer 7 <- Reducer 6 (GROUP, 31) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Group By Operator - aggregations: sum(substr(value, 5)) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: rand() (type: double) - value expressions: _col1 (type: double) - Reducer 6 - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: partials - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - value expressions: _col1 (type: double) - Reducer 7 - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: final - outputColumnNames: _col0, _col1 - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + Stats-Aggr Operator PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out b/ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out index 2d2c55b..e2ee201 100644 --- a/ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out @@ -30,39 +30,24 @@ INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY S POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: src - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark Edges: - Reducer 4 <- Map 1 (GROUP, 31) + Reducer 2 <- Map 1 (GROUP, 31) + Reducer 3 <- Map 1 (GROUP, 31) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: src Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value @@ -71,7 +56,15 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) value expressions: substr(value, 5) (type: string) - Reducer 4 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: substr(value, 5) (type: string) + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -88,6 +81,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 Stage: Stage-3 Dependency Collection @@ -102,7 +112,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -115,43 +125,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark - Edges: - Reducer 5 <- Map 2 (GROUP, 31) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - value expressions: substr(value, 5) (type: string) - Reducer 5 - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + Stats-Aggr Operator PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/spark/groupby_cube1.q.out b/ql/src/test/results/clientpositive/spark/groupby_cube1.q.out index 942cdaa..801a3ed 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_cube1.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_cube1.q.out @@ -365,33 +365,18 @@ INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: t1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) Reducer 4 <- Map 1 (GROUP SORT, 1) Reducer 5 <- Reducer 4 (GROUP SORT, 1) #### A masked pattern was here #### @@ -399,6 +384,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan + alias: t1 Select Operator expressions: key (type: string), val (type: string) outputColumnNames: key, val @@ -412,7 +398,20 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: rand() (type: double) value expressions: _col3 (type: bigint) - Reducer 4 + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Group By Operator + aggregations: sum(1) + keys: key (type: string), val (type: string), '0' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: rand() (type: double) + value expressions: _col3 (type: bigint) + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -424,7 +423,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) value expressions: _col3 (type: bigint) - Reducer 5 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -441,60 +440,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 - - Stage: Stage-3 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t2 - - Stage: Stage-6 - Stats-Aggr Operator - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t3 - - Stage: Stage-7 - Stats-Aggr Operator - - Stage: Stage-5 - Spark - Edges: - Reducer 6 <- Map 2 (GROUP SORT, 1) - Reducer 7 <- Reducer 6 (GROUP SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: key, val - Group By Operator - aggregations: sum(1) - keys: key (type: string), val (type: string), '0' (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: rand() (type: double) - value expressions: _col3 (type: bigint) - Reducer 6 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -506,7 +452,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) value expressions: _col3 (type: bigint) - Reducer 7 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -524,6 +470,35 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t2 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t3 + + Stage: Stage-5 + Stats-Aggr Operator + PREHOOK: query: FROM T1 INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube diff --git a/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out index 399fe41..b4bcf05 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out @@ -276,45 +276,31 @@ INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-5 is a root stage - Stage-7 depends on stages: Stage-5 - Stage-6 depends on stages: Stage-7, Stage-8 + Stage-6 depends on stages: Stage-5 Stage-0 depends on stages: Stage-6 - Stage-9 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-0 Stage-1 depends on stages: Stage-6 - Stage-10 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-1 Stage-2 depends on stages: Stage-6 - Stage-11 depends on stages: Stage-2 + Stage-9 depends on stages: Stage-2 Stage-3 depends on stages: Stage-6 - Stage-12 depends on stages: Stage-3 + Stage-10 depends on stages: Stage-3 Stage-4 depends on stages: Stage-6 - Stage-13 depends on stages: Stage-4 - Stage-8 depends on stages: Stage-5 + Stage-11 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-5 Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: src - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-7 - Spark Edges: - Reducer 4 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Map 1 (GROUP SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: src Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value @@ -323,7 +309,15 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: substr(key, 1, 1) (type: string) value expressions: value (type: string) - Reducer 4 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Reduce Output Operator + key expressions: substr(key, 1, 1) (type: string), substr(key, 2, 1) (type: string), substr(value, 5) (type: string) + sort order: +++ + Map-reduce partition columns: substr(key, 1, 1) (type: string), substr(key, 2, 1) (type: string) + value expressions: value (type: string) + Reducer 2 Reduce Operator Tree: Forward Filter Operator @@ -375,6 +369,56 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g4 + Reducer 3 + Reduce Operator Tree: + Forward + Group By Operator + aggregations: count(DISTINCT KEY._col2:0._col0), sum(KEY._col2:0._col0), count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Operator + expressions: _col0 (type: string), _col2 (type: bigint), concat(_col0, _col3) (type: string), _col3 (type: double), _col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Limit + Number of rows: 10 + Reduce Output Operator + sort order: + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: double), _col4 (type: bigint) + Filter Operator + predicate: (KEY._col0 >= 5) (type: boolean) + Group By Operator + aggregations: count(DISTINCT KEY._col2:0._col0), sum(KEY._col2:0._col0), count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col2) (type: int), concat(_col0, _col3) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_h3 + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint), VALUE._col2 (type: string), VALUE._col3 (type: double), VALUE._col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Limit + Number of rows: 10 + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_h2 Stage: Stage-6 Dependency Collection @@ -389,7 +433,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 - Stage: Stage-9 + Stage: Stage-7 Stats-Aggr Operator Stage: Stage-1 @@ -402,7 +446,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 - Stage: Stage-10 + Stage: Stage-8 Stats-Aggr Operator Stage: Stage-2 @@ -415,7 +459,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g4 - Stage: Stage-11 + Stage: Stage-9 Stats-Aggr Operator Stage: Stage-3 @@ -428,7 +472,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_h2 - Stage: Stage-12 + Stage: Stage-10 Stats-Aggr Operator Stage: Stage-4 @@ -441,78 +485,9 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_h3 - Stage: Stage-13 + Stage: Stage-11 Stats-Aggr Operator - Stage: Stage-8 - Spark - Edges: - Reducer 5 <- Map 2 (GROUP SORT, 1) - Reducer 6 <- Reducer 5 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Reduce Output Operator - key expressions: substr(key, 1, 1) (type: string), substr(key, 2, 1) (type: string), substr(value, 5) (type: string) - sort order: +++ - Map-reduce partition columns: substr(key, 1, 1) (type: string), substr(key, 2, 1) (type: string) - value expressions: value (type: string) - Reducer 5 - Reduce Operator Tree: - Forward - Group By Operator - aggregations: count(DISTINCT KEY._col2:0._col0), sum(KEY._col2:0._col0), count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Select Operator - expressions: _col0 (type: string), _col2 (type: bigint), concat(_col0, _col3) (type: string), _col3 (type: double), _col4 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Limit - Number of rows: 10 - Reduce Output Operator - sort order: - value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: double), _col4 (type: bigint) - Filter Operator - predicate: (KEY._col0 >= 5) (type: boolean) - Group By Operator - aggregations: count(DISTINCT KEY._col2:0._col0), sum(KEY._col2:0._col0), count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col2) (type: int), concat(_col0, _col3) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_h3 - Reducer 6 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint), VALUE._col2 (type: string), VALUE._col3 (type: double), VALUE._col4 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Limit - Number of rows: 10 - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_h2 - PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) diff --git a/ql/src/test/results/clientpositive/spark/groupby_position.q.out b/ql/src/test/results/clientpositive/spark/groupby_position.q.out index 5e68807..e8ca380 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_position.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_position.q.out @@ -30,39 +30,24 @@ INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBS POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: src - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark Edges: - Reducer 4 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Map 1 (GROUP SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: src Filter Operator predicate: (key < 20) (type: boolean) Select Operator @@ -77,7 +62,21 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) - Reducer 4 + Filter Operator + predicate: (key < 20) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Group By Operator + aggregations: count(DISTINCT substr(value, 5)) + keys: key (type: string), value (type: string), substr(value, 5) (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) @@ -94,6 +93,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable1 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col2:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.testtable2 Stage: Stage-3 Dependency Collection @@ -108,7 +124,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -121,49 +137,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable2 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark - Edges: - Reducer 5 <- Map 2 (GROUP SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Filter Operator - predicate: (key < 20) (type: boolean) - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Group By Operator - aggregations: count(DISTINCT substr(value, 5)) - keys: key (type: string), value (type: string), substr(value, 5) (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Reducer 5 - Reduce Operator Tree: - Group By Operator - aggregations: count(DISTINCT KEY._col2:0._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.testtable2 + Stats-Aggr Operator PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 @@ -240,39 +215,24 @@ INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBS POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: src - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark Edges: - Reducer 4 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Map 1 (GROUP SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: src Filter Operator predicate: (key < 20) (type: boolean) Select Operator @@ -287,7 +247,21 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) - Reducer 4 + Filter Operator + predicate: (key < 20) (type: boolean) + Select Operator + expressions: value (type: string), key (type: string) + outputColumnNames: value, key + Group By Operator + aggregations: count(DISTINCT substr(value, 5)) + keys: value (type: string), key (type: string), substr(value, 5) (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) @@ -304,6 +278,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable1 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col2:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: UDFToInteger(_col1) (type: int), _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.testtable2 Stage: Stage-3 Dependency Collection @@ -318,7 +309,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -331,49 +322,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable2 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark - Edges: - Reducer 5 <- Map 2 (GROUP SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Filter Operator - predicate: (key < 20) (type: boolean) - Select Operator - expressions: value (type: string), key (type: string) - outputColumnNames: value, key - Group By Operator - aggregations: count(DISTINCT substr(value, 5)) - keys: value (type: string), key (type: string), substr(value, 5) (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Reducer 5 - Reduce Operator Tree: - Group By Operator - aggregations: count(DISTINCT KEY._col2:0._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: UDFToInteger(_col1) (type: int), _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.testtable2 + Stats-Aggr Operator PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 diff --git a/ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out b/ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out index 4259412..7182a23 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out @@ -353,33 +353,18 @@ INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with rollup POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: t1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) Reducer 4 <- Map 1 (GROUP SORT, 1) Reducer 5 <- Reducer 4 (GROUP SORT, 1) #### A masked pattern was here #### @@ -387,6 +372,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan + alias: t1 Select Operator expressions: key (type: string), val (type: string) outputColumnNames: key, val @@ -400,7 +386,20 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: rand() (type: double) value expressions: _col3 (type: bigint) - Reducer 4 + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Group By Operator + aggregations: sum(1) + keys: key (type: string), val (type: string), '0' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: rand() (type: double) + value expressions: _col3 (type: bigint) + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -412,7 +411,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) value expressions: _col3 (type: bigint) - Reducer 5 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -429,60 +428,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 - - Stage: Stage-3 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t2 - - Stage: Stage-6 - Stats-Aggr Operator - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t3 - - Stage: Stage-7 - Stats-Aggr Operator - - Stage: Stage-5 - Spark - Edges: - Reducer 6 <- Map 2 (GROUP SORT, 1) - Reducer 7 <- Reducer 6 (GROUP SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: key, val - Group By Operator - aggregations: sum(1) - keys: key (type: string), val (type: string), '0' (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: rand() (type: double) - value expressions: _col3 (type: bigint) - Reducer 6 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -494,7 +440,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) value expressions: _col3 (type: bigint) - Reducer 7 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -512,6 +458,35 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t2 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t3 + + Stage: Stage-5 + Stats-Aggr Operator + PREHOOK: query: FROM T1 INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with rollup INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with rollup diff --git a/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out b/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out index e0e882e..879dd93 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out @@ -4557,39 +4557,23 @@ INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: t2 - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark Edges: - Reducer 4 <- Map 1 (GROUP, 31) + Reducer 2 <- Map 1 (GROUP, 31) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: t2 Select Operator expressions: key (type: string) outputColumnNames: key @@ -4604,7 +4588,25 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) value expressions: _col1 (type: bigint) - Reducer 4 + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Group By Operator + aggregations: count(1) + keys: key (type: string), val (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4635,7 +4637,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -4648,34 +4650,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: key, val - Group By Operator - aggregations: count(1) - keys: key (type: string), val (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + Stats-Aggr Operator PREHOOK: query: FROM T2 INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key @@ -4739,20 +4715,20 @@ INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 31) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: t2 @@ -4761,37 +4737,39 @@ STAGE PLANS: Select Operator expressions: val (type: string) outputColumnNames: _col1 - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark - Edges: - Reducer 4 <- Map 1 (GROUP, 31) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - Select Operator - expressions: '8' (type: string) - outputColumnNames: _col0 - Group By Operator - aggregations: count(1) - bucketGroup: true - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - value expressions: _col1 (type: bigint) - Reducer 4 + Select Operator + expressions: '8' (type: string) + outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + bucketGroup: true + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) + Select Operator + expressions: '8' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4822,7 +4800,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -4835,34 +4813,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Select Operator - expressions: '8' (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + Stats-Aggr Operator PREHOOK: query: FROM (select key, val from T2 where key = 8) x INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key diff --git a/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out b/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out index a43921e..e1b724e 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out @@ -4669,40 +4669,24 @@ INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: t2 - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark Edges: - Reducer 4 <- Map 1 (GROUP SORT, 31) - Reducer 5 <- Reducer 4 (GROUP, 31) + Reducer 2 <- Map 1 (GROUP SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 31) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: t2 Select Operator expressions: key (type: string) outputColumnNames: key @@ -4717,7 +4701,25 @@ STAGE PLANS: sort order: + Map-reduce partition columns: rand() (type: double) value expressions: _col1 (type: bigint) - Reducer 4 + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Group By Operator + aggregations: count(1) + keys: key (type: string), val (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4729,7 +4731,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) value expressions: _col1 (type: bigint) - Reducer 5 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4760,7 +4762,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -4773,34 +4775,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: key, val - Group By Operator - aggregations: count(1) - keys: key (type: string), val (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + Stats-Aggr Operator PREHOOK: query: FROM T2 INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key @@ -4864,20 +4840,21 @@ INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 31) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: t2 @@ -4886,38 +4863,39 @@ STAGE PLANS: Select Operator expressions: val (type: string) outputColumnNames: _col1 - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark - Edges: - Reducer 4 <- Map 1 (GROUP SORT, 31) - Reducer 5 <- Reducer 4 (GROUP, 31) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - Select Operator - expressions: '8' (type: string) - outputColumnNames: _col0 - Group By Operator - aggregations: count(1) - bucketGroup: true - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: rand() (type: double) - value expressions: _col1 (type: bigint) - Reducer 4 + Select Operator + expressions: '8' (type: string) + outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + bucketGroup: true + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + value expressions: _col1 (type: bigint) + Select Operator + expressions: '8' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4929,7 +4907,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) value expressions: _col1 (type: bigint) - Reducer 5 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4960,7 +4938,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -4973,34 +4951,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Select Operator - expressions: '8' (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + Stats-Aggr Operator PREHOOK: query: FROM (select key, val from T2 where key = 8) x INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key diff --git a/ql/src/test/results/clientpositive/spark/input12.q.out b/ql/src/test/results/clientpositive/spark/input12.q.out index 4b0cf44..cd7cab4 100644 --- a/ql/src/test/results/clientpositive/spark/input12.q.out +++ b/ql/src/test/results/clientpositive/spark/input12.q.out @@ -40,40 +40,23 @@ INSERT OVERWRITE TABLE dest3 PARTITION(ds='2008-04-08', hr='12') SELECT src.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-3 is a root stage - Stage-5 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-5, Stage-6, Stage-7 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-4 - Stage-8 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-0 Stage-1 depends on stages: Stage-4 - Stage-9 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 - Stage-10 depends on stages: Stage-2 - Stage-6 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-3 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 4 - Map Operator Tree: - TableScan - alias: src - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: Map 1 Map Operator Tree: TableScan + alias: src Filter Operator predicate: (key < 100) (type: boolean) Select Operator @@ -86,6 +69,30 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Filter Operator + predicate: ((key >= 100) and (key < 200)) (type: boolean) + Select Operator + expressions: UDFToInteger(key) (type: int), value (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + Filter Operator + predicate: (key >= 200) (type: boolean) + Select Operator + expressions: UDFToInteger(key) (type: int) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest3 Stage: Stage-4 Dependency Collection @@ -100,7 +107,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Stage: Stage-8 + Stage: Stage-5 Stats-Aggr Operator Stage: Stage-1 @@ -113,7 +120,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-9 + Stage: Stage-6 Stats-Aggr Operator Stage: Stage-2 @@ -129,48 +136,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 - Stage: Stage-10 - Stats-Aggr Operator - - Stage: Stage-6 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Filter Operator - predicate: ((key >= 100) and (key < 200)) (type: boolean) - Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - Stage: Stage-7 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - Filter Operator - predicate: (key >= 200) (type: boolean) - Select Operator - expressions: UDFToInteger(key) (type: int) - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest3 + Stats-Aggr Operator PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100 diff --git a/ql/src/test/results/clientpositive/spark/input13.q.out b/ql/src/test/results/clientpositive/spark/input13.q.out index 260a65a..670effe 100644 --- a/ql/src/test/results/clientpositive/spark/input13.q.out +++ b/ql/src/test/results/clientpositive/spark/input13.q.out @@ -38,42 +38,24 @@ INSERT OVERWRITE DIRECTORY 'target/warehouse/dest4.out' SELECT src.value WHERE s POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-4 is a root stage - Stage-6 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9 + Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 - Stage-10 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-0 Stage-1 depends on stages: Stage-5 - Stage-11 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-1 Stage-2 depends on stages: Stage-5 - Stage-12 depends on stages: Stage-2 - Stage-7 depends on stages: Stage-4 - Stage-8 depends on stages: Stage-4 - Stage-9 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-9 + Stage-8 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: - Map 5 - Map Operator Tree: - TableScan - alias: src - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-6 - Spark -#### A masked pattern was here #### - Vertices: Map 1 Map Operator Tree: TableScan + alias: src Filter Operator predicate: (key < 100) (type: boolean) Select Operator @@ -86,6 +68,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Filter Operator + predicate: ((key >= 100) and (key < 200)) (type: boolean) + Select Operator + expressions: UDFToInteger(key) (type: int), value (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + Filter Operator + predicate: ((key >= 200) and (key < 300)) (type: boolean) + Select Operator + expressions: UDFToInteger(key) (type: int) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest3 + Filter Operator + predicate: (key >= 300) (type: boolean) + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Dependency Collection @@ -100,7 +117,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Stage: Stage-10 + Stage: Stage-6 Stats-Aggr Operator Stage: Stage-1 @@ -113,7 +130,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-11 + Stage: Stage-7 Stats-Aggr Operator Stage: Stage-2 @@ -129,67 +146,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 - Stage: Stage-12 - Stats-Aggr Operator - - Stage: Stage-7 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Filter Operator - predicate: ((key >= 100) and (key < 200)) (type: boolean) - Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - Stage: Stage-8 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - Filter Operator - predicate: ((key >= 200) and (key < 300)) (type: boolean) - Select Operator - expressions: UDFToInteger(key) (type: int) - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest3 - - Stage: Stage-9 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 - Map Operator Tree: - TableScan - Filter Operator - predicate: (key >= 300) (type: boolean) - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stats-Aggr Operator Stage: Stage-3 Move Operator diff --git a/ql/src/test/results/clientpositive/spark/input1_limit.q.out b/ql/src/test/results/clientpositive/spark/input1_limit.q.out index 1f3b484..795d40d 100644 --- a/ql/src/test/results/clientpositive/spark/input1_limit.q.out +++ b/ql/src/test/results/clientpositive/spark/input1_limit.q.out @@ -30,39 +30,24 @@ INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key < 100 LIMIT POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: src - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark Edges: - Reducer 4 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: src Filter Operator predicate: (key < 100) (type: boolean) Select Operator @@ -73,7 +58,17 @@ STAGE PLANS: Reduce Output Operator sort order: value expressions: _col0 (type: string), _col1 (type: string) - Reducer 4 + Filter Operator + predicate: (key < 100) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Limit + Number of rows: 5 + Reduce Output Operator + sort order: + value expressions: _col0 (type: string), _col1 (type: string) + Reducer 2 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) @@ -90,6 +85,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Limit + Number of rows: 5 + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 Stage: Stage-3 Dependency Collection @@ -104,7 +116,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -117,45 +129,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark - Edges: - Reducer 5 <- Map 2 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Filter Operator - predicate: (key < 100) (type: boolean) - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Limit - Number of rows: 5 - Reduce Output Operator - sort order: - value expressions: _col0 (type: string), _col1 (type: string) - Reducer 5 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1 - Limit - Number of rows: 5 - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + Stats-Aggr Operator PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, src.value WHERE src.key < 100 LIMIT 10 diff --git a/ql/src/test/results/clientpositive/spark/input_part2.q.out b/ql/src/test/results/clientpositive/spark/input_part2.q.out index f2f3a2d..c39485e 100644 --- a/ql/src/test/results/clientpositive/spark/input_part2.q.out +++ b/ql/src/test/results/clientpositive/spark/input_part2.q.out @@ -129,41 +129,84 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: srcpart GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns key,value,ds,hr,BLOCK__OFFSET__INSIDE__FILE,INPUT__FILE__NAME,ROW__ID - columns.types string,string,string,string,bigint,string,struct - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Filter Operator + isSamplingPred: false + predicate: ((key < 100) and (ds = '2008-04-08')) (type: boolean) + Select Operator + expressions: UDFToInteger(key) (type: int), value (type: string), hr (type: string), '2008-04-08' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,hr,ds + columns.comments + columns.types int:string:string:string +#### A masked pattern was here #### + name default.dest1 + serialization.ddl struct dest1 { i32 key, string value, string hr, string ds} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Filter Operator + isSamplingPred: false + predicate: ((key < 100) and (ds = '2008-04-09')) (type: boolean) + Select Operator + expressions: UDFToInteger(key) (type: int), value (type: string), hr (type: string), '2008-04-09' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 2 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,hr,ds + columns.comments + columns.types int:string:string:string +#### A masked pattern was here #### + name default.dest2 + serialization.ddl struct dest2 { i32 key, string value, string hr, string ds} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -263,71 +306,6 @@ STAGE PLANS: /srcpart/ds=2008-04-08/hr=12 [srcpart] /srcpart/ds=2008-04-09/hr=12 [srcpart] - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key < 100) and (ds = '2008-04-08')) (type: boolean) - Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), hr (type: string), '2008-04-08' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,hr,ds - columns.comments - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest1 - serialization.ddl struct dest1 { i32 key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns key,value,ds,hr,BLOCK__OFFSET__INSIDE__FILE,INPUT__FILE__NAME,ROW__ID - columns.types string,string,string,string,bigint,string,struct - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns key,value,ds,hr,BLOCK__OFFSET__INSIDE__FILE,INPUT__FILE__NAME,ROW__ID - columns.types string,string,string,string,bigint,string,struct - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Truncated Path -> Alias: -#### A masked pattern was here #### - Stage: Stage-3 Dependency Collection @@ -353,7 +331,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator #### A masked pattern was here #### @@ -379,73 +357,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 - Stats-Aggr Operator -#### A masked pattern was here #### - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key < 100) and (ds = '2008-04-09')) (type: boolean) - Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), hr (type: string), '2008-04-09' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - GlobalTableId: 2 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,hr,ds - columns.comments - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest2 - serialization.ddl struct dest2 { i32 key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns key,value,ds,hr,BLOCK__OFFSET__INSIDE__FILE,INPUT__FILE__NAME,ROW__ID - columns.types string,string,string,string,bigint,string,struct - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns key,value,ds,hr,BLOCK__OFFSET__INSIDE__FILE,INPUT__FILE__NAME,ROW__ID - columns.types string,string,string,string,bigint,string,struct - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Truncated Path -> Alias: + Stats-Aggr Operator #### A masked pattern was here #### PREHOOK: query: FROM srcpart diff --git a/ql/src/test/results/clientpositive/spark/insert1.q.out b/ql/src/test/results/clientpositive/spark/insert1.q.out index 65032cb..28e20f5 100644 --- a/ql/src/test/results/clientpositive/spark/insert1.q.out +++ b/ql/src/test/results/clientpositive/spark/insert1.q.out @@ -249,37 +249,21 @@ insert overwrite table x.insert1 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark #### A masked pattern was here #### Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: insert2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: Map 1 Map Operator Tree: TableScan + alias: insert2 Filter Operator predicate: (key < 10) (type: boolean) Select Operator @@ -292,6 +276,18 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: x.insert1 Stage: Stage-3 Dependency Collection @@ -306,7 +302,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -319,28 +315,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: x.insert1 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Filter Operator - predicate: ((key > 10) and (key < 20)) (type: boolean) - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: x.insert1 + Stats-Aggr Operator PREHOOK: query: -- HIVE-3676 CREATE DATABASE db2 diff --git a/ql/src/test/results/clientpositive/spark/insert_into3.q.out b/ql/src/test/results/clientpositive/spark/insert_into3.q.out index 5318a8b..ea064a0 100644 --- a/ql/src/test/results/clientpositive/spark/insert_into3.q.out +++ b/ql/src/test/results/clientpositive/spark/insert_into3.q.out @@ -30,46 +30,37 @@ POSTHOOK: query: EXPLAIN FROM src INSERT INTO TABLE insert_into3a SELECT * ORDER POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: src - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark Edges: - Reducer 4 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Map 1 (GROUP SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: src Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Reducer 4 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) @@ -86,6 +77,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3a + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Limit + Number of rows: 100 + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.insert_into3b Stage: Stage-3 Dependency Collection @@ -100,7 +108,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3a - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -113,41 +121,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3b - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark - Edges: - Reducer 5 <- Map 2 (GROUP SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Reducer 5 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Limit - Number of rows: 100 - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.insert_into3b + Stats-Aggr Operator PREHOOK: query: FROM src INSERT INTO TABLE insert_into3a SELECT * ORDER BY key, value LIMIT 50 INSERT INTO TABLE insert_into3b SELECT * ORDER BY key, value LIMIT 100 @@ -201,39 +176,24 @@ POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE insert_into3a SELECT * POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: src - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark Edges: - Reducer 4 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: src Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -242,7 +202,15 @@ STAGE PLANS: Reduce Output Operator sort order: value expressions: _col0 (type: string), _col1 (type: string) - Reducer 4 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Limit + Number of rows: 10 + Reduce Output Operator + sort order: + value expressions: _col0 (type: string), _col1 (type: string) + Reducer 2 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) @@ -259,6 +227,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3a + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Limit + Number of rows: 10 + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.insert_into3b Stage: Stage-3 Dependency Collection @@ -273,7 +258,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3a - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -286,43 +271,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3b - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark - Edges: - Reducer 5 <- Map 2 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Limit - Number of rows: 10 - Reduce Output Operator - sort order: - value expressions: _col0 (type: string), _col1 (type: string) - Reducer 5 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1 - Limit - Number of rows: 10 - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.insert_into3b + Stats-Aggr Operator PREHOOK: query: FROM src INSERT OVERWRITE TABLE insert_into3a SELECT * LIMIT 10 INSERT INTO TABLE insert_into3b SELECT * LIMIT 10 diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part1.q.out b/ql/src/test/results/clientpositive/spark/load_dyn_part1.q.out index 3b669fc..2b2b08d 100644 --- a/ql/src/test/results/clientpositive/spark/load_dyn_part1.q.out +++ b/ql/src/test/results/clientpositive/spark/load_dyn_part1.q.out @@ -58,37 +58,21 @@ insert overwrite table nzhang_part2 partition(ds='2008-12-31', hr) select key, v POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark #### A masked pattern was here #### Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: srcpart - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: Map 1 Map Operator Tree: TableScan + alias: srcpart Filter Operator predicate: (ds <= '2008-04-08') (type: boolean) Select Operator @@ -101,6 +85,18 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part1 + Filter Operator + predicate: (ds > '2008-04-08') (type: boolean) + Select Operator + expressions: key (type: string), value (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part2 Stage: Stage-3 Dependency Collection @@ -118,7 +114,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -134,28 +130,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part2 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Filter Operator - predicate: (ds > '2008-04-08') (type: boolean) - Select Operator - expressions: key (type: string), value (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part2 + Stats-Aggr Operator PREHOOK: query: from srcpart insert overwrite table nzhang_part1 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part8.q.out b/ql/src/test/results/clientpositive/spark/load_dyn_part8.q.out index 50c052d..aaa1791 100644 --- a/ql/src/test/results/clientpositive/spark/load_dyn_part8.q.out +++ b/ql/src/test/results/clientpositive/spark/load_dyn_part8.q.out @@ -113,41 +113,89 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: srcpart GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 + Filter Operator + isSamplingPred: false + predicate: (ds <= '2008-04-08') (type: boolean) + Select Operator + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.nzhang_part8 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct nzhang_part8 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part8 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Filter Operator + isSamplingPred: false + predicate: (ds > '2008-04-08') (type: boolean) + Select Operator + expressions: key (type: string), value (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 2 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns key,value,ds,hr,BLOCK__OFFSET__INSIDE__FILE,INPUT__FILE__NAME,ROW__ID - columns.types string,string,string,string,bigint,string,struct - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2008-12-31/ +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.nzhang_part8 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct nzhang_part8 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part8 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -341,73 +389,6 @@ STAGE PLANS: /srcpart/ds=2008-04-09/hr=11 [srcpart] /srcpart/ds=2008-04-09/hr=12 [srcpart] - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (ds <= '2008-04-08') (type: boolean) - Select Operator - expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments defaultdefault - columns.types string:string -#### A masked pattern was here #### - name default.nzhang_part8 - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct nzhang_part8 { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part8 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns key,value,ds,hr,BLOCK__OFFSET__INSIDE__FILE,INPUT__FILE__NAME,ROW__ID - columns.types string,string,string,string,bigint,string,struct - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns key,value,ds,hr,BLOCK__OFFSET__INSIDE__FILE,INPUT__FILE__NAME,ROW__ID - columns.types string,string,string,string,bigint,string,struct - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Truncated Path -> Alias: -#### A masked pattern was here #### - Stage: Stage-3 Dependency Collection @@ -438,7 +419,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part8 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator #### A masked pattern was here #### @@ -469,76 +450,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part8 - Stage: Stage-7 - Stats-Aggr Operator -#### A masked pattern was here #### - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (ds > '2008-04-08') (type: boolean) - Select Operator - expressions: key (type: string), value (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 2 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-12-31/ -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments defaultdefault - columns.types string:string -#### A masked pattern was here #### - name default.nzhang_part8 - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct nzhang_part8 { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part8 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns key,value,ds,hr,BLOCK__OFFSET__INSIDE__FILE,INPUT__FILE__NAME,ROW__ID - columns.types string,string,string,string,bigint,string,struct - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns key,value,ds,hr,BLOCK__OFFSET__INSIDE__FILE,INPUT__FILE__NAME,ROW__ID - columns.types string,string,string,string,bigint,string,struct - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Truncated Path -> Alias: + Stats-Aggr Operator #### A masked pattern was here #### PREHOOK: query: from srcpart diff --git a/ql/src/test/results/clientpositive/spark/multi_insert.q.out b/ql/src/test/results/clientpositive/spark/multi_insert.q.out index bae325f..d130dc6 100644 --- a/ql/src/test/results/clientpositive/spark/multi_insert.q.out +++ b/ql/src/test/results/clientpositive/spark/multi_insert.q.out @@ -30,37 +30,21 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark #### A masked pattern was here #### Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: src - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: Map 1 Map Operator Tree: TableScan + alias: src Filter Operator predicate: (key < 10) (type: boolean) Select Operator @@ -73,6 +57,18 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Stage: Stage-3 Dependency Collection @@ -87,7 +83,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -100,28 +96,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Filter Operator - predicate: ((key > 10) and (key < 20)) (type: boolean) - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Stats-Aggr Operator PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 @@ -190,37 +166,21 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark #### A masked pattern was here #### Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: src - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: Map 1 Map Operator Tree: TableScan + alias: src Filter Operator predicate: (key < 10) (type: boolean) Select Operator @@ -233,6 +193,18 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Stage: Stage-3 Dependency Collection @@ -247,7 +219,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -260,28 +232,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Filter Operator - predicate: ((key > 10) and (key < 20)) (type: boolean) - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Stats-Aggr Operator PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 @@ -350,37 +302,21 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark #### A masked pattern was here #### Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: src - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: Map 1 Map Operator Tree: TableScan + alias: src Filter Operator predicate: (key < 10) (type: boolean) Select Operator @@ -393,6 +329,18 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Stage: Stage-3 Dependency Collection @@ -407,7 +355,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -420,28 +368,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Filter Operator - predicate: ((key > 10) and (key < 20)) (type: boolean) - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Stats-Aggr Operator PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 @@ -510,37 +438,21 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark #### A masked pattern was here #### Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: src - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: Map 1 Map Operator Tree: TableScan + alias: src Filter Operator predicate: (key < 10) (type: boolean) Select Operator @@ -553,6 +465,18 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Stage: Stage-3 Dependency Collection @@ -567,7 +491,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -580,28 +504,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Filter Operator - predicate: ((key > 10) and (key < 20)) (type: boolean) - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Stats-Aggr Operator PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 @@ -1274,69 +1178,83 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark Edges: - Union 4 <- Map 3 (NONE, 0), Map 5 (NONE, 0) + Union 2 <- Map 1 (NONE, 0), Map 3 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Map 5 + Filter Operator + predicate: (_col0 < 10) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 + Filter Operator + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + Map 3 Map Operator Tree: TableScan alias: src Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Union 4 - Vertex: Union 4 - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - Filter Operator - predicate: (_col0 < 10) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + Filter Operator + predicate: (_col0 < 10) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 + Filter Operator + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + Union 2 + Vertex: Union 2 Stage: Stage-3 Dependency Collection @@ -1351,7 +1269,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -1364,28 +1282,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Filter Operator - predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Stats-Aggr Operator PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 @@ -1473,69 +1371,83 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark Edges: - Union 4 <- Map 3 (NONE, 0), Map 5 (NONE, 0) + Union 2 <- Map 1 (NONE, 0), Map 3 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Map 5 + Filter Operator + predicate: (_col0 < 10) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 + Filter Operator + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + Map 3 Map Operator Tree: TableScan alias: src Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Union 4 - Vertex: Union 4 - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - Filter Operator - predicate: (_col0 < 10) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + Filter Operator + predicate: (_col0 < 10) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 + Filter Operator + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + Union 2 + Vertex: Union 2 Stage: Stage-3 Dependency Collection @@ -1550,7 +1462,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -1563,29 +1475,9 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-7 + Stage: Stage-5 Stats-Aggr Operator - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Filter Operator - predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 - PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 insert overwrite table src_multi2 select * where key > 10 and key < 20 @@ -1672,69 +1564,83 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark Edges: - Union 4 <- Map 3 (NONE, 0), Map 5 (NONE, 0) + Union 2 <- Map 1 (NONE, 0), Map 3 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Map 5 + Filter Operator + predicate: (_col0 < 10) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 + Filter Operator + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + Map 3 Map Operator Tree: TableScan alias: src Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Union 4 - Vertex: Union 4 - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - Filter Operator - predicate: (_col0 < 10) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + Filter Operator + predicate: (_col0 < 10) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 + Filter Operator + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + Union 2 + Vertex: Union 2 Stage: Stage-3 Dependency Collection @@ -1749,7 +1655,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -1762,28 +1668,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Filter Operator - predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Stats-Aggr Operator PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 @@ -1871,69 +1757,83 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark Edges: - Union 4 <- Map 3 (NONE, 0), Map 5 (NONE, 0) + Union 2 <- Map 1 (NONE, 0), Map 3 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Map 5 + Filter Operator + predicate: (_col0 < 10) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 + Filter Operator + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + Map 3 Map Operator Tree: TableScan alias: src Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Union 4 - Vertex: Union 4 - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - Filter Operator - predicate: (_col0 < 10) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + Filter Operator + predicate: (_col0 < 10) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 + Filter Operator + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + Union 2 + Vertex: Union 2 Stage: Stage-3 Dependency Collection @@ -1948,7 +1848,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -1961,28 +1861,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Filter Operator - predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Stats-Aggr Operator PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 @@ -2069,37 +1949,20 @@ from src POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-3 is a root stage - Stage-5 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-5, Stage-6, Stage-7 - Stage-0 depends on stages: Stage-5 - Stage-6 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-3 - Stage-2 depends on stages: Stage-7 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 4 - Map Operator Tree: - TableScan - alias: src - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: Map 1 Map Operator Tree: TableScan + alias: src Filter Operator predicate: (key = 0) (type: boolean) Select Operator @@ -2111,23 +1974,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Dependency Collection - - Stage: Stage-0 - Move Operator - files: - hdfs directory: false -#### A masked pattern was here #### - - Stage: Stage-6 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan Filter Operator predicate: (key = 2) (type: boolean) Select Operator @@ -2139,20 +1985,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-1 - Move Operator - files: - hdfs directory: false -#### A masked pattern was here #### - - Stage: Stage-7 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan Filter Operator predicate: (key = 4) (type: boolean) Select Operator @@ -2165,6 +1997,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-4 + Dependency Collection + + Stage: Stage-0 + Move Operator + files: + hdfs directory: false +#### A masked pattern was here #### + + Stage: Stage-1 + Move Operator + files: + hdfs directory: false +#### A masked pattern was here #### + Stage: Stage-2 Move Operator files: @@ -2193,37 +2040,20 @@ from src POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-3 is a root stage - Stage-5 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-5, Stage-6, Stage-7 - Stage-0 depends on stages: Stage-5 - Stage-6 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-3 - Stage-2 depends on stages: Stage-7 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 4 - Map Operator Tree: - TableScan - alias: src - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: Map 1 Map Operator Tree: TableScan + alias: src Filter Operator predicate: (key = 0) (type: boolean) Select Operator @@ -2235,23 +2065,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Dependency Collection - - Stage: Stage-0 - Move Operator - files: - hdfs directory: false -#### A masked pattern was here #### - - Stage: Stage-6 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan Filter Operator predicate: (key = 2) (type: boolean) Select Operator @@ -2263,20 +2076,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-1 - Move Operator - files: - hdfs directory: false -#### A masked pattern was here #### - - Stage: Stage-7 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan Filter Operator predicate: (key = 4) (type: boolean) Select Operator @@ -2289,6 +2088,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-4 + Dependency Collection + + Stage: Stage-0 + Move Operator + files: + hdfs directory: false +#### A masked pattern was here #### + + Stage: Stage-1 + Move Operator + files: + hdfs directory: false +#### A masked pattern was here #### + Stage: Stage-2 Move Operator files: @@ -2317,37 +2131,20 @@ from src POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-3 is a root stage - Stage-5 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-5, Stage-6, Stage-7 - Stage-0 depends on stages: Stage-5 - Stage-6 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-3 - Stage-2 depends on stages: Stage-7 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 4 - Map Operator Tree: - TableScan - alias: src - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: Map 1 Map Operator Tree: TableScan + alias: src Filter Operator predicate: (key = 0) (type: boolean) Select Operator @@ -2359,23 +2156,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Dependency Collection - - Stage: Stage-0 - Move Operator - files: - hdfs directory: false -#### A masked pattern was here #### - - Stage: Stage-6 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan Filter Operator predicate: (key = 2) (type: boolean) Select Operator @@ -2387,20 +2167,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-1 - Move Operator - files: - hdfs directory: false -#### A masked pattern was here #### - - Stage: Stage-7 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan Filter Operator predicate: (key = 4) (type: boolean) Select Operator @@ -2413,6 +2179,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-4 + Dependency Collection + + Stage: Stage-0 + Move Operator + files: + hdfs directory: false +#### A masked pattern was here #### + + Stage: Stage-1 + Move Operator + files: + hdfs directory: false +#### A masked pattern was here #### + Stage: Stage-2 Move Operator files: @@ -2441,37 +2222,20 @@ from src POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-3 is a root stage - Stage-5 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-5, Stage-6, Stage-7 - Stage-0 depends on stages: Stage-5 - Stage-6 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-3 - Stage-2 depends on stages: Stage-7 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 4 - Map Operator Tree: - TableScan - alias: src - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: Map 1 Map Operator Tree: TableScan + alias: src Filter Operator predicate: (key = 0) (type: boolean) Select Operator @@ -2483,23 +2247,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Dependency Collection - - Stage: Stage-0 - Move Operator - files: - hdfs directory: false -#### A masked pattern was here #### - - Stage: Stage-6 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan Filter Operator predicate: (key = 2) (type: boolean) Select Operator @@ -2511,20 +2258,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-1 - Move Operator - files: - hdfs directory: false -#### A masked pattern was here #### - - Stage: Stage-7 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan Filter Operator predicate: (key = 4) (type: boolean) Select Operator @@ -2537,6 +2270,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-4 + Dependency Collection + + Stage: Stage-0 + Move Operator + files: + hdfs directory: false +#### A masked pattern was here #### + + Stage: Stage-1 + Move Operator + files: + hdfs directory: false +#### A masked pattern was here #### + Stage: Stage-2 Move Operator files: diff --git a/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out b/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out index 280a893..a356e0a 100644 --- a/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out +++ b/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out @@ -1584,39 +1584,24 @@ select value, count(distinct key) group by value POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: src - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark Edges: - Reducer 4 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Map 1 (GROUP SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: src Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value @@ -1629,7 +1614,19 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) - Reducer 4 + Select Operator + expressions: value (type: string), key (type: string) + outputColumnNames: value, key + Group By Operator + aggregations: count(DISTINCT key) + keys: value (type: string), key (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) @@ -1646,6 +1643,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), UDFToDouble(_col1) (type: double) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e3 Stage: Stage-3 Dependency Collection @@ -1660,7 +1674,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -1673,47 +1687,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark - Edges: - Reducer 5 <- Map 2 (GROUP SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Select Operator - expressions: value (type: string), key (type: string) - outputColumnNames: value, key - Group By Operator - aggregations: count(DISTINCT key) - keys: value (type: string), key (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Reducer 5 - Reduce Operator Tree: - Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), UDFToDouble(_col1) (type: double) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e3 + Stats-Aggr Operator PREHOOK: query: explain FROM (select key, cast(key as double) as keyD, value from src order by key) a @@ -1735,24 +1710,23 @@ INSERT overwrite table e3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-3 is a root stage - Stage-5 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-5, Stage-6 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-4 - Stage-7 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-0 Stage-1 depends on stages: Stage-4 - Stage-8 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 - Stage-9 depends on stages: Stage-2 - Stage-6 depends on stages: Stage-3 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 4 <- Map 3 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src @@ -1764,36 +1738,38 @@ STAGE PLANS: sort order: ++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: double), _col2 (type: string) value expressions: _col1 (type: double), _col2 (type: string) - Reducer 4 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-5 - Spark - Edges: - Reducer 5 <- Map 1 (GROUP SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - Select Operator - expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Reduce Output Operator - key expressions: _col0 (type: string), _col2 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - value expressions: _col1 (type: double) - Reducer 5 + Select Operator + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: double) + Select Operator + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Group By Operator + aggregations: count(DISTINCT _col1) + keys: _col0 (type: string), _col1 (type: double), _col2 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: _col0 (type: string), UDFToDouble(_col3) (type: double) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e3 + Reducer 3 Reduce Operator Tree: Forward Group By Operator @@ -1840,7 +1816,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 - Stage: Stage-7 + Stage: Stage-5 Stats-Aggr Operator Stage: Stage-1 @@ -1853,7 +1829,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-8 + Stage: Stage-6 Stats-Aggr Operator Stage: Stage-2 @@ -1866,32 +1842,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 - Stage: Stage-9 + Stage: Stage-7 Stats-Aggr Operator - Stage: Stage-6 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Select Operator - expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Group By Operator - aggregations: count(DISTINCT _col1) - keys: _col0 (type: string), _col1 (type: double), _col2 (type: string) - mode: complete - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - expressions: _col0 (type: string), UDFToDouble(_col3) (type: double) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e3 - diff --git a/ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out b/ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out index b07c582..767a263 100644 --- a/ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out +++ b/ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out @@ -59,37 +59,21 @@ insert overwrite table src_lv2 select key, C lateral view explode(array(key+3, k POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark #### A masked pattern was here #### Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: src_10 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: Map 1 Map Operator Tree: TableScan + alias: src_10 Lateral View Forward Select Operator expressions: key (type: string) @@ -123,43 +107,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 - - Stage: Stage-3 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv1 - - Stage: Stage-6 - Stats-Aggr Operator - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv2 - - Stage: Stage-7 - Stats-Aggr Operator - - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan Lateral View Forward Select Operator expressions: key (type: string) @@ -194,6 +141,35 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv2 + + Stage: Stage-5 + Stats-Aggr Operator + PREHOOK: query: from src_10 insert overwrite table src_lv1 select key, C lateral view explode(array(key+1, key+2)) A as C insert overwrite table src_lv2 select key, C lateral view explode(array(key+3, key+4)) A as C @@ -292,39 +268,24 @@ insert overwrite table src_lv2 select key, sum(C) lateral view explode(array(key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: src_10 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark Edges: - Reducer 4 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: src_10 Lateral View Forward Select Operator expressions: key (type: string) @@ -364,62 +325,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) value expressions: _col1 (type: double) - Reducer 4 - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv1 - - Stage: Stage-3 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv1 - - Stage: Stage-6 - Stats-Aggr Operator - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv2 - - Stage: Stage-7 - Stats-Aggr Operator - - Stage: Stage-5 - Spark - Edges: - Reducer 5 <- Map 2 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan Lateral View Forward Select Operator expressions: key (type: string) @@ -459,7 +364,24 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) value expressions: _col1 (type: double) - Reducer 5 + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -477,6 +399,35 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv2 + + Stage: Stage-5 + Stats-Aggr Operator + PREHOOK: query: from src_10 insert overwrite table src_lv1 select key, sum(C) lateral view explode(array(key+1, key+2)) A as C group by key insert overwrite table src_lv2 select key, sum(C) lateral view explode(array(key+3, key+4)) A as C group by key @@ -557,41 +508,26 @@ insert overwrite table src_lv3 select key, count(value) where key < 200 group by POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-3 is a root stage - Stage-5 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-5, Stage-6 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-4 - Stage-7 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-0 Stage-1 depends on stages: Stage-4 - Stage-8 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 - Stage-9 depends on stages: Stage-2 - Stage-6 depends on stages: Stage-3 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: src_10 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-5 - Spark Edges: - Reducer 4 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 1 (GROUP SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: src_10 Lateral View Forward Select Operator expressions: key (type: string) @@ -631,7 +567,17 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) value expressions: _col1 (type: double) - Reducer 4 + Filter Operator + predicate: ((key > 200) or (key < 200)) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -648,6 +594,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Reducer 3 + Reduce Operator Tree: + Forward + Filter Operator + predicate: (KEY._col0 > 200) (type: boolean) + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv2 + Filter Operator + predicate: (KEY._col0 < 200) (type: boolean) + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv3 Stage: Stage-4 Dependency Collection @@ -662,7 +645,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 - Stage: Stage-7 + Stage: Stage-5 Stats-Aggr Operator Stage: Stage-1 @@ -675,7 +658,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 - Stage: Stage-8 + Stage: Stage-6 Stats-Aggr Operator Stage: Stage-2 @@ -688,66 +671,9 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv3 - Stage: Stage-9 + Stage: Stage-7 Stats-Aggr Operator - Stage: Stage-6 - Spark - Edges: - Reducer 5 <- Map 2 (GROUP SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Filter Operator - predicate: ((key > 200) or (key < 200)) (type: boolean) - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - value expressions: value (type: string) - Reducer 5 - Reduce Operator Tree: - Forward - Filter Operator - predicate: (KEY._col0 > 200) (type: boolean) - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv2 - Filter Operator - predicate: (KEY._col0 < 200) (type: boolean) - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv3 - PREHOOK: query: from src_10 insert overwrite table src_lv1 select key, sum(C) lateral view explode(array(key+1, key+2)) A as C group by key insert overwrite table src_lv2 select key, count(value) where key > 200 group by key @@ -847,42 +773,27 @@ insert overwrite table src_lv3 select value, sum(distinct key) group by value POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-3 is a root stage - Stage-5 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-5, Stage-6, Stage-7 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-4 - Stage-8 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-0 Stage-1 depends on stages: Stage-4 - Stage-9 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 - Stage-10 depends on stages: Stage-2 - Stage-6 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-3 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark -#### A masked pattern was here #### - Vertices: - Map 4 - Map Operator Tree: - TableScan - alias: src_10 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-5 - Spark Edges: - Reducer 5 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Map 1 (GROUP SORT, 1) + Reducer 4 <- Map 1 (GROUP SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: src_10 Lateral View Forward Select Operator expressions: key (type: string) @@ -920,75 +831,6 @@ STAGE PLANS: key expressions: _col0 (type: double), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: double) - Reducer 5 - Reduce Operator Tree: - Group By Operator - aggregations: sum(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: double) - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: double), _col1 (type: double) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv1 - - Stage: Stage-4 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv1 - - Stage: Stage-8 - Stats-Aggr Operator - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv2 - - Stage: Stage-9 - Stats-Aggr Operator - - Stage: Stage-2 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv3 - - Stage: Stage-10 - Stats-Aggr Operator - - Stage: Stage-6 - Spark - Edges: - Reducer 6 <- Map 2 (GROUP SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan Lateral View Forward Select Operator expressions: key (type: string) @@ -1026,7 +868,36 @@ STAGE PLANS: key expressions: _col0 (type: double), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: double) - Reducer 6 + Select Operator + expressions: value (type: string), key (type: string) + outputColumnNames: value, key + Group By Operator + aggregations: sum(DISTINCT key) + keys: value (type: string), key (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: sum(DISTINCT KEY._col1:0._col0) @@ -1043,29 +914,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 - - Stage: Stage-7 - Spark - Edges: - Reducer 7 <- Map 3 (GROUP SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - Select Operator - expressions: value (type: string), key (type: string) - outputColumnNames: value, key - Group By Operator - aggregations: sum(DISTINCT key) - keys: value (type: string), key (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Reducer 7 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: sum(DISTINCT KEY._col1:0._col0) @@ -1083,6 +932,48 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv3 + Stage: Stage-4 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 + + Stage: Stage-5 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv2 + + Stage: Stage-6 + Stats-Aggr Operator + + Stage: Stage-2 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv3 + + Stage: Stage-7 + Stats-Aggr Operator + PREHOOK: query: from src_10 insert overwrite table src_lv1 select C, sum(distinct key) lateral view explode(array(key+1, key+2)) A as C group by C insert overwrite table src_lv2 select C, sum(distinct key) lateral view explode(array(key+3, key+4)) A as C group by C @@ -1210,44 +1101,29 @@ insert overwrite table src_lv4 select value, sum(distinct key) where key < 200 g POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-4 is a root stage - Stage-6 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-6, Stage-7, Stage-8 + Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 - Stage-9 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-0 Stage-1 depends on stages: Stage-5 - Stage-10 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-1 Stage-2 depends on stages: Stage-5 - Stage-11 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-2 Stage-3 depends on stages: Stage-5 - Stage-12 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-4 - Stage-8 depends on stages: Stage-4 + Stage-9 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-4 Spark -#### A masked pattern was here #### - Vertices: - Map 4 - Map Operator Tree: - TableScan - alias: src_10 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-6 - Spark Edges: - Reducer 5 <- Map 1 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Map 1 (GROUP SORT, 1) + Reducer 4 <- Map 1 (GROUP SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: src_10 Lateral View Forward Select Operator expressions: key (type: string) @@ -1285,88 +1161,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: double) sort order: ++ Map-reduce partition columns: _col0 (type: string) - Reducer 5 - Reduce Operator Tree: - Group By Operator - aggregations: sum(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv1 - - Stage: Stage-5 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv1 - - Stage: Stage-9 - Stats-Aggr Operator - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv2 - - Stage: Stage-10 - Stats-Aggr Operator - - Stage: Stage-2 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv3 - - Stage: Stage-11 - Stats-Aggr Operator - - Stage: Stage-3 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv4 - - Stage: Stage-12 - Stats-Aggr Operator - - Stage: Stage-7 - Spark - Edges: - Reducer 6 <- Map 2 (GROUP SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan Lateral View Forward Select Operator expressions: key (type: string) @@ -1404,7 +1198,33 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: double) sort order: ++ Map-reduce partition columns: _col0 (type: string) - Reducer 6 + Filter Operator + predicate: ((key > 200) or (key < 200)) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Reduce Output Operator + key expressions: value (type: string), key (type: string) + sort order: ++ + Map-reduce partition columns: value (type: string) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: sum(DISTINCT KEY._col1:0._col0) @@ -1421,26 +1241,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 - - Stage: Stage-8 - Spark - Edges: - Reducer 7 <- Map 3 (GROUP SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - Filter Operator - predicate: ((key > 200) or (key < 200)) (type: boolean) - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Reduce Output Operator - key expressions: value (type: string), key (type: string) - sort order: ++ - Map-reduce partition columns: value (type: string) - Reducer 7 + Reducer 4 Reduce Operator Tree: Forward Filter Operator @@ -1478,6 +1279,61 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv4 + Stage: Stage-5 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 + + Stage: Stage-6 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv2 + + Stage: Stage-7 + Stats-Aggr Operator + + Stage: Stage-2 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv3 + + Stage: Stage-8 + Stats-Aggr Operator + + Stage: Stage-3 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv4 + + Stage: Stage-9 + Stats-Aggr Operator + PREHOOK: query: from src_10 insert overwrite table src_lv1 select key, sum(distinct C) lateral view explode(array(key+1, key+2)) A as C group by key insert overwrite table src_lv2 select key, sum(distinct C) lateral view explode(array(key+3, key+4)) A as C group by key diff --git a/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out b/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out index fd477ca..edb7475 100644 --- a/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out +++ b/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out @@ -30,37 +30,21 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark #### A masked pattern was here #### Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: src - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: Map 1 Map Operator Tree: TableScan + alias: src Filter Operator predicate: (key < 10) (type: boolean) Select Operator @@ -73,6 +57,18 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Stage: Stage-3 Dependency Collection @@ -87,7 +83,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -100,28 +96,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Filter Operator - predicate: ((key > 10) and (key < 20)) (type: boolean) - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Stats-Aggr Operator PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 @@ -190,37 +166,21 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark #### A masked pattern was here #### Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: src - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: Map 1 Map Operator Tree: TableScan + alias: src Filter Operator predicate: (key < 10) (type: boolean) Select Operator @@ -233,6 +193,18 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Stage: Stage-3 Dependency Collection @@ -247,7 +219,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -260,28 +232,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Filter Operator - predicate: ((key > 10) and (key < 20)) (type: boolean) - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Stats-Aggr Operator PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 @@ -350,37 +302,21 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark #### A masked pattern was here #### Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: src - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: Map 1 Map Operator Tree: TableScan + alias: src Filter Operator predicate: (key < 10) (type: boolean) Select Operator @@ -393,6 +329,18 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Stage: Stage-3 Dependency Collection @@ -407,7 +355,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -420,28 +368,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Filter Operator - predicate: ((key > 10) and (key < 20)) (type: boolean) - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Stats-Aggr Operator PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 @@ -510,37 +438,21 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark #### A masked pattern was here #### Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: src - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: Map 1 Map Operator Tree: TableScan + alias: src Filter Operator predicate: (key < 10) (type: boolean) Select Operator @@ -553,6 +465,18 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Stage: Stage-3 Dependency Collection @@ -567,7 +491,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -580,28 +504,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Filter Operator - predicate: ((key > 10) and (key < 20)) (type: boolean) - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Stats-Aggr Operator PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 @@ -1274,69 +1178,83 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark Edges: - Union 4 <- Map 3 (NONE, 0), Map 5 (NONE, 0) + Union 2 <- Map 1 (NONE, 0), Map 3 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Map 5 + Filter Operator + predicate: (_col0 < 10) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 + Filter Operator + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + Map 3 Map Operator Tree: TableScan alias: src Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Union 4 - Vertex: Union 4 - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - Filter Operator - predicate: (_col0 < 10) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + Filter Operator + predicate: (_col0 < 10) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 + Filter Operator + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + Union 2 + Vertex: Union 2 Stage: Stage-3 Dependency Collection @@ -1351,7 +1269,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -1364,28 +1282,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Filter Operator - predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Stats-Aggr Operator PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 @@ -1473,69 +1371,83 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark Edges: - Union 4 <- Map 3 (NONE, 0), Map 5 (NONE, 0) + Union 2 <- Map 1 (NONE, 0), Map 3 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Map 5 + Filter Operator + predicate: (_col0 < 10) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 + Filter Operator + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + Map 3 Map Operator Tree: TableScan alias: src Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Union 4 - Vertex: Union 4 - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - Filter Operator - predicate: (_col0 < 10) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + Filter Operator + predicate: (_col0 < 10) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 + Filter Operator + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + Union 2 + Vertex: Union 2 Stage: Stage-3 Dependency Collection @@ -1550,7 +1462,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -1563,28 +1475,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Filter Operator - predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Stats-Aggr Operator PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 @@ -1672,69 +1564,83 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark Edges: - Union 4 <- Map 3 (NONE, 0), Map 5 (NONE, 0) + Union 2 <- Map 1 (NONE, 0), Map 3 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Map 5 + Filter Operator + predicate: (_col0 < 10) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 + Filter Operator + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + Map 3 Map Operator Tree: TableScan alias: src Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Union 4 - Vertex: Union 4 - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - Filter Operator - predicate: (_col0 < 10) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + Filter Operator + predicate: (_col0 < 10) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 + Filter Operator + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + Union 2 + Vertex: Union 2 Stage: Stage-3 Dependency Collection @@ -1749,7 +1655,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -1762,28 +1668,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Filter Operator - predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Stats-Aggr Operator PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 @@ -1871,69 +1757,83 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark Edges: - Union 4 <- Map 3 (NONE, 0), Map 5 (NONE, 0) + Union 2 <- Map 1 (NONE, 0), Map 3 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Map 5 + Filter Operator + predicate: (_col0 < 10) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 + Filter Operator + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + Map 3 Map Operator Tree: TableScan alias: src Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Union 4 - Vertex: Union 4 - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - Filter Operator - predicate: (_col0 < 10) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + Filter Operator + predicate: (_col0 < 10) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 + Filter Operator + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + Union 2 + Vertex: Union 2 Stage: Stage-3 Dependency Collection @@ -1948,7 +1848,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -1961,28 +1861,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Filter Operator - predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Stats-Aggr Operator PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 @@ -2069,37 +1949,20 @@ from src POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-3 is a root stage - Stage-5 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-5, Stage-6, Stage-7 - Stage-0 depends on stages: Stage-5 - Stage-6 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-3 - Stage-2 depends on stages: Stage-7 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 4 - Map Operator Tree: - TableScan - alias: src - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: Map 1 Map Operator Tree: TableScan + alias: src Filter Operator predicate: (key = 0) (type: boolean) Select Operator @@ -2111,23 +1974,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Dependency Collection - - Stage: Stage-0 - Move Operator - files: - hdfs directory: false -#### A masked pattern was here #### - - Stage: Stage-6 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan Filter Operator predicate: (key = 2) (type: boolean) Select Operator @@ -2139,20 +1985,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-1 - Move Operator - files: - hdfs directory: false -#### A masked pattern was here #### - - Stage: Stage-7 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan Filter Operator predicate: (key = 4) (type: boolean) Select Operator @@ -2165,6 +1997,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-4 + Dependency Collection + + Stage: Stage-0 + Move Operator + files: + hdfs directory: false +#### A masked pattern was here #### + + Stage: Stage-1 + Move Operator + files: + hdfs directory: false +#### A masked pattern was here #### + Stage: Stage-2 Move Operator files: @@ -2193,37 +2040,20 @@ from src POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-3 is a root stage - Stage-5 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-5, Stage-6, Stage-7 - Stage-0 depends on stages: Stage-5 - Stage-6 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-3 - Stage-2 depends on stages: Stage-7 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 4 - Map Operator Tree: - TableScan - alias: src - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: Map 1 Map Operator Tree: TableScan + alias: src Filter Operator predicate: (key = 0) (type: boolean) Select Operator @@ -2235,23 +2065,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Dependency Collection - - Stage: Stage-0 - Move Operator - files: - hdfs directory: false -#### A masked pattern was here #### - - Stage: Stage-6 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan Filter Operator predicate: (key = 2) (type: boolean) Select Operator @@ -2263,20 +2076,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-1 - Move Operator - files: - hdfs directory: false -#### A masked pattern was here #### - - Stage: Stage-7 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan Filter Operator predicate: (key = 4) (type: boolean) Select Operator @@ -2289,6 +2088,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-4 + Dependency Collection + + Stage: Stage-0 + Move Operator + files: + hdfs directory: false +#### A masked pattern was here #### + + Stage: Stage-1 + Move Operator + files: + hdfs directory: false +#### A masked pattern was here #### + Stage: Stage-2 Move Operator files: @@ -2317,37 +2131,20 @@ from src POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-3 is a root stage - Stage-5 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-5, Stage-6, Stage-7 - Stage-0 depends on stages: Stage-5 - Stage-6 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-3 - Stage-2 depends on stages: Stage-7 - + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-3 + STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 4 - Map Operator Tree: - TableScan - alias: src - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: Map 1 Map Operator Tree: TableScan + alias: src Filter Operator predicate: (key = 0) (type: boolean) Select Operator @@ -2359,23 +2156,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Dependency Collection - - Stage: Stage-0 - Move Operator - files: - hdfs directory: false -#### A masked pattern was here #### - - Stage: Stage-6 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan Filter Operator predicate: (key = 2) (type: boolean) Select Operator @@ -2387,20 +2167,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-1 - Move Operator - files: - hdfs directory: false -#### A masked pattern was here #### - - Stage: Stage-7 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan Filter Operator predicate: (key = 4) (type: boolean) Select Operator @@ -2413,6 +2179,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-4 + Dependency Collection + + Stage: Stage-0 + Move Operator + files: + hdfs directory: false +#### A masked pattern was here #### + + Stage: Stage-1 + Move Operator + files: + hdfs directory: false +#### A masked pattern was here #### + Stage: Stage-2 Move Operator files: @@ -2441,37 +2222,20 @@ from src POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-3 is a root stage - Stage-5 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-5, Stage-6, Stage-7 - Stage-0 depends on stages: Stage-5 - Stage-6 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-3 - Stage-2 depends on stages: Stage-7 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 4 - Map Operator Tree: - TableScan - alias: src - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: Map 1 Map Operator Tree: TableScan + alias: src Filter Operator predicate: (key = 0) (type: boolean) Select Operator @@ -2483,23 +2247,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Dependency Collection - - Stage: Stage-0 - Move Operator - files: - hdfs directory: false -#### A masked pattern was here #### - - Stage: Stage-6 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan Filter Operator predicate: (key = 2) (type: boolean) Select Operator @@ -2511,20 +2258,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-1 - Move Operator - files: - hdfs directory: false -#### A masked pattern was here #### - - Stage: Stage-7 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan Filter Operator predicate: (key = 4) (type: boolean) Select Operator @@ -2537,6 +2270,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-4 + Dependency Collection + + Stage: Stage-0 + Move Operator + files: + hdfs directory: false +#### A masked pattern was here #### + + Stage: Stage-1 + Move Operator + files: + hdfs directory: false +#### A masked pattern was here #### + Stage: Stage-2 Move Operator files: @@ -3037,40 +2785,27 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-4 is a root stage - Stage-6 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-6, Stage-7, Stage-8 + Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 - Stage-9 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-0 Stage-1 depends on stages: Stage-5 - Stage-10 depends on stages: Stage-1 - Stage-7 depends on stages: Stage-4 - Stage-8 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-8 - Stage-3 depends on stages: Stage-8 + Stage-7 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-4 Spark -#### A masked pattern was here #### - Vertices: - Map 4 - Map Operator Tree: - TableScan - alias: src - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-6 - Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 4 <- Reducer 2 (GROUP SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: src Filter Operator predicate: (key < 10) (type: boolean) Select Operator @@ -3083,43 +2818,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - - Stage: Stage-5 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 - - Stage: Stage-9 - Stats-Aggr Operator - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 - - Stage: Stage-10 - Stats-Aggr Operator - - Stage: Stage-7 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Select Operator @@ -3132,18 +2830,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - - Stage: Stage-8 - Spark - Edges: - Reducer 5 <- Map 3 (GROUP SORT, 1) - Reducer 6 <- Reducer 5 (GROUP SORT, 1) - Reducer 7 <- Reducer 5 (GROUP SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Select Operator @@ -3153,7 +2839,7 @@ STAGE PLANS: key expressions: key (type: string), value (type: string) sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) - Reducer 5 + Reducer 2 Reduce Operator Tree: Forward Filter Operator @@ -3184,7 +2870,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) value expressions: _col0 (type: string) - Reducer 6 + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) @@ -3195,7 +2881,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + Reducer 4 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -3207,6 +2893,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-5 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 + + Stage: Stage-6 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-7 + Stats-Aggr Operator + Stage: Stage-2 Move Operator files: @@ -3294,40 +3009,27 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-4 is a root stage - Stage-6 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-6, Stage-7, Stage-8 + Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 - Stage-9 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-0 Stage-1 depends on stages: Stage-5 - Stage-10 depends on stages: Stage-1 - Stage-7 depends on stages: Stage-4 - Stage-8 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-8 - Stage-3 depends on stages: Stage-8 + Stage-7 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-4 Spark -#### A masked pattern was here #### - Vertices: - Map 4 - Map Operator Tree: - TableScan - alias: src - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-6 - Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 4 <- Reducer 2 (GROUP SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: src Filter Operator predicate: (key < 10) (type: boolean) Select Operator @@ -3340,43 +3042,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - - Stage: Stage-5 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 - - Stage: Stage-9 - Stats-Aggr Operator - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 - - Stage: Stage-10 - Stats-Aggr Operator - - Stage: Stage-7 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Select Operator @@ -3389,18 +3054,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - - Stage: Stage-8 - Spark - Edges: - Reducer 5 <- Map 3 (GROUP SORT, 1) - Reducer 6 <- Reducer 5 (GROUP SORT, 1) - Reducer 7 <- Reducer 5 (GROUP SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Select Operator @@ -3410,7 +3063,7 @@ STAGE PLANS: key expressions: key (type: string), value (type: string) sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) - Reducer 5 + Reducer 2 Reduce Operator Tree: Forward Filter Operator @@ -3441,7 +3094,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) value expressions: _col0 (type: string) - Reducer 6 + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) @@ -3452,7 +3105,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + Reducer 4 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -3464,6 +3117,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-5 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 + + Stage: Stage-6 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-7 + Stats-Aggr Operator + Stage: Stage-2 Move Operator files: @@ -3551,40 +3233,27 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-4 is a root stage - Stage-6 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-6, Stage-7, Stage-8 + Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 - Stage-9 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-0 Stage-1 depends on stages: Stage-5 - Stage-10 depends on stages: Stage-1 - Stage-7 depends on stages: Stage-4 - Stage-8 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-8 - Stage-3 depends on stages: Stage-8 + Stage-7 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-4 Spark -#### A masked pattern was here #### - Vertices: - Map 4 - Map Operator Tree: - TableScan - alias: src - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-6 - Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 4 <- Reducer 2 (GROUP SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: src Filter Operator predicate: (key < 10) (type: boolean) Select Operator @@ -3597,43 +3266,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - - Stage: Stage-5 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 - - Stage: Stage-9 - Stats-Aggr Operator - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 - - Stage: Stage-10 - Stats-Aggr Operator - - Stage: Stage-7 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Select Operator @@ -3646,18 +3278,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - - Stage: Stage-8 - Spark - Edges: - Reducer 5 <- Map 3 (GROUP SORT, 1) - Reducer 6 <- Reducer 5 (GROUP SORT, 1) - Reducer 7 <- Reducer 5 (GROUP SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Select Operator @@ -3667,7 +3287,7 @@ STAGE PLANS: key expressions: key (type: string), value (type: string) sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) - Reducer 5 + Reducer 2 Reduce Operator Tree: Forward Filter Operator @@ -3698,7 +3318,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) value expressions: _col0 (type: string) - Reducer 6 + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) @@ -3709,7 +3329,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + Reducer 4 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -3721,6 +3341,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-5 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 + + Stage: Stage-6 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-7 + Stats-Aggr Operator + Stage: Stage-2 Move Operator files: @@ -3808,40 +3457,27 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-4 is a root stage - Stage-6 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-6, Stage-7, Stage-8 + Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 - Stage-9 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-0 Stage-1 depends on stages: Stage-5 - Stage-10 depends on stages: Stage-1 - Stage-7 depends on stages: Stage-4 - Stage-8 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-8 - Stage-3 depends on stages: Stage-8 + Stage-7 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-4 Spark -#### A masked pattern was here #### - Vertices: - Map 4 - Map Operator Tree: - TableScan - alias: src - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-6 - Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 4 <- Reducer 2 (GROUP SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: src Filter Operator predicate: (key < 10) (type: boolean) Select Operator @@ -3854,43 +3490,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - - Stage: Stage-5 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 - - Stage: Stage-9 - Stats-Aggr Operator - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 - - Stage: Stage-10 - Stats-Aggr Operator - - Stage: Stage-7 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Select Operator @@ -3903,18 +3502,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - - Stage: Stage-8 - Spark - Edges: - Reducer 5 <- Map 3 (GROUP SORT, 1) - Reducer 6 <- Reducer 5 (GROUP SORT, 1) - Reducer 7 <- Reducer 5 (GROUP SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Select Operator @@ -3924,7 +3511,7 @@ STAGE PLANS: key expressions: key (type: string), value (type: string) sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) - Reducer 5 + Reducer 2 Reduce Operator Tree: Forward Filter Operator @@ -3955,7 +3542,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) value expressions: _col0 (type: string) - Reducer 6 + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) @@ -3966,7 +3553,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + Reducer 4 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -3978,6 +3565,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-5 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 + + Stage: Stage-6 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-7 + Stats-Aggr Operator + Stage: Stage-2 Move Operator files: diff --git a/ql/src/test/results/clientpositive/spark/multigroupby_singlemr.q.out b/ql/src/test/results/clientpositive/spark/multigroupby_singlemr.q.out index 44991e3..15df6c6 100644 --- a/ql/src/test/results/clientpositive/spark/multigroupby_singlemr.q.out +++ b/ql/src/test/results/clientpositive/spark/multigroupby_singlemr.q.out @@ -50,39 +50,24 @@ INSERT OVERWRITE TABLE DEST2 SELECT TBL.C1, TBL.C2, COUNT(TBL.C3) GROUP BY TBL.C POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: tbl - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark Edges: - Reducer 4 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: tbl Select Operator expressions: c1 (type: int), c2 (type: int) outputColumnNames: c1, c2 @@ -96,7 +81,20 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) value expressions: _col1 (type: bigint) - Reducer 4 + Select Operator + expressions: c1 (type: int), c2 (type: int), c3 (type: int) + outputColumnNames: c1, c2, c3 + Group By Operator + aggregations: count(c3) + keys: c1 (type: int), c2 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + value expressions: _col2 (type: bigint) + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -113,6 +111,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 Stage: Stage-3 Dependency Collection @@ -127,7 +142,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -140,48 +155,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark - Edges: - Reducer 5 <- Map 2 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Select Operator - expressions: c1 (type: int), c2 (type: int), c3 (type: int) - outputColumnNames: c1, c2, c3 - Group By Operator - aggregations: count(c3) - keys: c1 (type: int), c2 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - value expressions: _col2 (type: bigint) - Reducer 5 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: _col0 (type: int), _col1 (type: int), UDFToInteger(_col2) (type: int) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + Stats-Aggr Operator PREHOOK: query: EXPLAIN FROM TBL @@ -195,39 +170,24 @@ INSERT OVERWRITE TABLE DEST2 SELECT TBL.C1, TBL.C2, COUNT(TBL.C3) GROUP BY TBL.C POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: tbl - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark Edges: - Reducer 4 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: tbl Select Operator expressions: c1 (type: int), c2 (type: int) outputColumnNames: c1, c2 @@ -241,7 +201,20 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) value expressions: _col1 (type: bigint) - Reducer 4 + Select Operator + expressions: c2 (type: int), c1 (type: int), c3 (type: int) + outputColumnNames: c2, c1, c3 + Group By Operator + aggregations: count(c3) + keys: c2 (type: int), c1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + value expressions: _col2 (type: bigint) + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -258,6 +231,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: int), _col0 (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 Stage: Stage-3 Dependency Collection @@ -272,7 +262,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -285,48 +275,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark - Edges: - Reducer 5 <- Map 2 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Select Operator - expressions: c2 (type: int), c1 (type: int), c3 (type: int) - outputColumnNames: c2, c1, c3 - Group By Operator - aggregations: count(c3) - keys: c2 (type: int), c1 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - value expressions: _col2 (type: bigint) - Reducer 5 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: _col1 (type: int), _col0 (type: int), UDFToInteger(_col2) (type: int) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + Stats-Aggr Operator PREHOOK: query: EXPLAIN FROM TBL @@ -340,39 +290,24 @@ INSERT OVERWRITE TABLE DEST2 SELECT TBL.C1, TBL.C2, COUNT(TBL.C3) GROUP BY TBL.C POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: tbl - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark Edges: - Reducer 4 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: tbl Select Operator expressions: c1 (type: int), c2 (type: int), c3 (type: int), c4 (type: int) outputColumnNames: c1, c2, c3, c4 @@ -386,7 +321,20 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) value expressions: _col3 (type: bigint) - Reducer 4 + Select Operator + expressions: c1 (type: int), c2 (type: int), c3 (type: int) + outputColumnNames: c1, c2, c3 + Group By Operator + aggregations: count(c3) + keys: c1 (type: int), c2 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + value expressions: _col2 (type: bigint) + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -403,6 +351,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 Stage: Stage-3 Dependency Collection @@ -417,7 +382,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -430,48 +395,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark - Edges: - Reducer 5 <- Map 2 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Select Operator - expressions: c1 (type: int), c2 (type: int), c3 (type: int) - outputColumnNames: c1, c2, c3 - Group By Operator - aggregations: count(c3) - keys: c1 (type: int), c2 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - value expressions: _col2 (type: bigint) - Reducer 5 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: _col0 (type: int), _col1 (type: int), UDFToInteger(_col2) (type: int) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + Stats-Aggr Operator PREHOOK: query: EXPLAIN FROM TBL @@ -587,42 +512,27 @@ INSERT OVERWRITE TABLE DEST1 SELECT TBL.C1, COUNT(TBL.C2) GROUP BY TBL.C1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-3 is a root stage - Stage-5 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-5, Stage-6, Stage-7 + Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-4 - Stage-8 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-0 Stage-1 depends on stages: Stage-4 - Stage-9 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 - Stage-10 depends on stages: Stage-2 - Stage-6 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-3 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark -#### A masked pattern was here #### - Vertices: - Map 4 - Map Operator Tree: - TableScan - alias: tbl - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-5 - Spark Edges: - Reducer 5 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 1 (GROUP, 1) + Reducer 4 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: tbl Select Operator expressions: c1 (type: int), c2 (type: int), c3 (type: int), c4 (type: int) outputColumnNames: c1, c2, c3, c4 @@ -636,7 +546,33 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) value expressions: _col3 (type: bigint) - Reducer 5 + Select Operator + expressions: c1 (type: int), c2 (type: int), c3 (type: int) + outputColumnNames: c1, c2, c3 + Group By Operator + aggregations: count(c3) + keys: c1 (type: int), c2 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + value expressions: _col2 (type: bigint) + Select Operator + expressions: c1 (type: int), c2 (type: int) + outputColumnNames: c1, c2 + Group By Operator + aggregations: count(c2) + keys: c1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -653,6 +589,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Stage: Stage-4 Dependency Collection @@ -667,7 +637,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 - Stage: Stage-8 + Stage: Stage-5 Stats-Aggr Operator Stage: Stage-1 @@ -680,7 +650,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-9 + Stage: Stage-6 Stats-Aggr Operator Stage: Stage-2 @@ -693,86 +663,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Stage: Stage-10 - Stats-Aggr Operator - - Stage: Stage-6 - Spark - Edges: - Reducer 6 <- Map 2 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Select Operator - expressions: c1 (type: int), c2 (type: int), c3 (type: int) - outputColumnNames: c1, c2, c3 - Group By Operator - aggregations: count(c3) - keys: c1 (type: int), c2 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - value expressions: _col2 (type: bigint) - Reducer 6 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: _col0 (type: int), _col1 (type: int), UDFToInteger(_col2) (type: int) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - Stage: Stage-7 - Spark - Edges: - Reducer 7 <- Map 3 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - Select Operator - expressions: c1 (type: int), c2 (type: int) - outputColumnNames: c1, c2 - Group By Operator - aggregations: count(c2) - keys: c1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) - Reducer 7 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + Stats-Aggr Operator diff --git a/ql/src/test/results/clientpositive/spark/ppd_multi_insert.q.out b/ql/src/test/results/clientpositive/spark/ppd_multi_insert.q.out index 96f2c06..a5cb88f 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_multi_insert.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_multi_insert.q.out @@ -38,27 +38,23 @@ INSERT OVERWRITE DIRECTORY 'target/warehouse/mi4.out' SELECT a.value WHERE a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-4 is a root stage - Stage-6 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9 + Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 - Stage-10 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-0 Stage-1 depends on stages: Stage-5 - Stage-11 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-1 Stage-2 depends on stages: Stage-5 - Stage-12 depends on stages: Stage-2 - Stage-7 depends on stages: Stage-4 - Stage-8 depends on stages: Stage-4 - Stage-9 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-9 + Stage-8 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 6 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 5 + Map 1 Map Operator Tree: TableScan alias: b @@ -68,7 +64,7 @@ STAGE PLANS: key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Map 7 + Map 3 Map Operator Tree: TableScan alias: a @@ -79,7 +75,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) value expressions: value (type: string) - Reducer 6 + Reducer 2 Reduce Operator Tree: Join Operator condition map: @@ -88,32 +84,53 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-6 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - Filter Operator - predicate: (_col0 < 100) (type: boolean) - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.mi1 + Filter Operator + predicate: (_col0 < 100) (type: boolean) + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.mi1 + Filter Operator + predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.mi2 + Filter Operator + predicate: ((_col0 >= 200) and (_col0 < 300)) (type: boolean) + Select Operator + expressions: UDFToInteger(_col0) (type: int) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.mi3 + Filter Operator + predicate: (_col0 >= 300) (type: boolean) + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Dependency Collection @@ -128,7 +145,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi1 - Stage: Stage-10 + Stage: Stage-6 Stats-Aggr Operator Stage: Stage-1 @@ -141,7 +158,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi2 - Stage: Stage-11 + Stage: Stage-7 Stats-Aggr Operator Stage: Stage-2 @@ -157,67 +174,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi3 - Stage: Stage-12 - Stats-Aggr Operator - - Stage: Stage-7 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Filter Operator - predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.mi2 - Stage: Stage-8 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - Filter Operator - predicate: ((_col0 >= 200) and (_col0 < 300)) (type: boolean) - Select Operator - expressions: UDFToInteger(_col0) (type: int) - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.mi3 - - Stage: Stage-9 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 - Map Operator Tree: - TableScan - Filter Operator - predicate: (_col0 >= 300) (type: boolean) - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stats-Aggr Operator Stage: Stage-3 Move Operator @@ -1325,27 +1283,23 @@ INSERT OVERWRITE DIRECTORY 'target/warehouse/mi4.out' SELECT a.value WHERE a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-4 is a root stage - Stage-6 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9 + Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 - Stage-10 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-0 Stage-1 depends on stages: Stage-5 - Stage-11 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-1 Stage-2 depends on stages: Stage-5 - Stage-12 depends on stages: Stage-2 - Stage-7 depends on stages: Stage-4 - Stage-8 depends on stages: Stage-4 - Stage-9 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-9 + Stage-8 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 6 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 5 + Map 1 Map Operator Tree: TableScan alias: b @@ -1355,7 +1309,7 @@ STAGE PLANS: key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Map 7 + Map 3 Map Operator Tree: TableScan alias: a @@ -1366,7 +1320,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) value expressions: value (type: string) - Reducer 6 + Reducer 2 Reduce Operator Tree: Join Operator condition map: @@ -1375,32 +1329,53 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-6 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - Filter Operator - predicate: (_col0 < 100) (type: boolean) - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.mi1 + Filter Operator + predicate: (_col0 < 100) (type: boolean) + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.mi1 + Filter Operator + predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.mi2 + Filter Operator + predicate: ((_col0 >= 200) and (_col0 < 300)) (type: boolean) + Select Operator + expressions: UDFToInteger(_col0) (type: int) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.mi3 + Filter Operator + predicate: (_col0 >= 300) (type: boolean) + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Dependency Collection @@ -1415,7 +1390,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi1 - Stage: Stage-10 + Stage: Stage-6 Stats-Aggr Operator Stage: Stage-1 @@ -1428,7 +1403,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi2 - Stage: Stage-11 + Stage: Stage-7 Stats-Aggr Operator Stage: Stage-2 @@ -1444,67 +1419,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi3 - Stage: Stage-12 - Stats-Aggr Operator - - Stage: Stage-7 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Filter Operator - predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.mi2 - Stage: Stage-8 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - Filter Operator - predicate: ((_col0 >= 200) and (_col0 < 300)) (type: boolean) - Select Operator - expressions: UDFToInteger(_col0) (type: int) - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.mi3 - - Stage: Stage-9 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 - Map Operator Tree: - TableScan - Filter Operator - predicate: (_col0 >= 300) (type: boolean) - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stats-Aggr Operator Stage: Stage-3 Move Operator diff --git a/ql/src/test/results/clientpositive/spark/ppd_transform.q.out b/ql/src/test/results/clientpositive/spark/ppd_transform.q.out index 7ec5d8d..815f5bb 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_transform.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_transform.q.out @@ -361,18 +361,16 @@ FROM ( POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-5 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src @@ -385,31 +383,28 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - Filter Operator - predicate: ((_col0 = 'a') or (_col0 = 'b')) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Filter Operator + predicate: ((_col0 = 'a') or (_col0 = 'b')) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Filter Operator + predicate: ((_col0 = 'c') or (_col0 = 'd')) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -420,25 +415,6 @@ STAGE PLANS: hdfs directory: false #### A masked pattern was here #### - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Filter Operator - predicate: ((_col0 = 'c') or (_col0 = 'd')) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-1 Move Operator files: diff --git a/ql/src/test/results/clientpositive/spark/spark_multi_insert_split_work.q.out b/ql/src/test/results/clientpositive/spark/spark_multi_insert_split_work.q.out new file mode 100644 index 0000000..345fac6 --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/spark_multi_insert_split_work.q.out @@ -0,0 +1,880 @@ +PREHOOK: query: create table src_multi1 like src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_multi1 +POSTHOOK: query: create table src_multi1 like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_multi1 +PREHOOK: query: create table src_multi2 like src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_multi2 +POSTHOOK: query: create table src_multi2 like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_multi2 +PREHOOK: query: create table src_multi3 like src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_multi3 +POSTHOOK: query: create table src_multi3 like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_multi3 +PREHOOK: query: -- This is used to test that spliting SparkWork works correctly in multi-insertion. +-- Especially, if a MapWork/ReduceWork to be splitted has FS, then there shouldn't be +-- duplicated copies of FS. + +explain +from src +insert overwrite table src_multi1 select key, count(1) group by key order by key +insert overwrite table src_multi2 select value, count(1) group by value order by value +insert overwrite table src_multi3 select * where key < 10 +PREHOOK: type: QUERY +POSTHOOK: query: -- This is used to test that spliting SparkWork works correctly in multi-insertion. +-- Especially, if a MapWork/ReduceWork to be splitted has FS, then there shouldn't be +-- duplicated copies of FS. + +explain +from src +insert overwrite table src_multi1 select key, count(1) group by key order by key +insert overwrite table src_multi2 select value, count(1) group by value order by value +insert overwrite table src_multi3 select * where key < 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-4 + Stage-7 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-3 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 4 <- Map 1 (GROUP, 1) + Reducer 5 <- Reducer 4 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Select Operator + expressions: key (type: string) + outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) + Select Operator + expressions: value (type: string) + outputColumnNames: value + Group By Operator + aggregations: count(1) + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) + Filter Operator + predicate: (key < 10) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi3 + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + value expressions: _col1 (type: bigint) + Reducer 5 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-4 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 + + Stage: Stage-5 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-6 + Stats-Aggr Operator + + Stage: Stage-2 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi3 + + Stage: Stage-7 + Stats-Aggr Operator + +PREHOOK: query: from src +insert overwrite table src_multi1 select key, count(1) group by key order by key +insert overwrite table src_multi2 select value, count(1) group by value order by value +insert overwrite table src_multi3 select * where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_multi1 +PREHOOK: Output: default@src_multi2 +PREHOOK: Output: default@src_multi3 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: from src +insert overwrite table src_multi1 select key, count(1) group by key order by key +insert overwrite table src_multi2 select value, count(1) group by value order by value +insert overwrite table src_multi3 select * where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_multi1 +POSTHOOK: Output: default@src_multi2 +POSTHOOK: Output: default@src_multi3 +POSTHOOK: Lineage: src_multi1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_multi1.value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: src_multi2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: src_multi3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_multi3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from src_multi1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_multi1 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_multi1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_multi1 +#### A masked pattern was here #### +0 3 +10 1 +100 2 +103 2 +104 2 +105 1 +11 1 +111 1 +113 2 +114 1 +116 1 +118 2 +119 3 +12 2 +120 2 +125 2 +126 1 +128 3 +129 2 +131 1 +133 1 +134 2 +136 1 +137 2 +138 4 +143 1 +145 1 +146 2 +149 2 +15 2 +150 1 +152 2 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 2 +165 2 +166 1 +167 3 +168 1 +169 4 +17 1 +170 1 +172 2 +174 2 +175 2 +176 2 +177 1 +178 1 +179 2 +18 2 +180 1 +181 1 +183 1 +186 1 +187 3 +189 1 +19 1 +190 1 +191 2 +192 1 +193 3 +194 1 +195 2 +196 1 +197 2 +199 3 +2 1 +20 1 +200 2 +201 1 +202 1 +203 2 +205 2 +207 2 +208 3 +209 2 +213 2 +214 1 +216 2 +217 2 +218 1 +219 2 +221 2 +222 1 +223 2 +224 2 +226 1 +228 1 +229 2 +230 5 +233 2 +235 1 +237 2 +238 2 +239 2 +24 2 +241 1 +242 2 +244 1 +247 1 +248 1 +249 1 +252 1 +255 2 +256 2 +257 1 +258 1 +26 2 +260 1 +262 1 +263 1 +265 2 +266 1 +27 1 +272 2 +273 3 +274 1 +275 1 +277 4 +278 2 +28 1 +280 2 +281 2 +282 2 +283 1 +284 1 +285 1 +286 1 +287 1 +288 2 +289 1 +291 1 +292 1 +296 1 +298 3 +30 1 +302 1 +305 1 +306 1 +307 2 +308 1 +309 2 +310 1 +311 3 +315 1 +316 3 +317 2 +318 3 +321 2 +322 2 +323 1 +325 2 +327 3 +33 1 +331 2 +332 1 +333 2 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 2 +344 2 +345 1 +348 5 +35 3 +351 1 +353 2 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 2 +368 1 +369 3 +37 2 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 2 +384 3 +386 1 +389 1 +392 1 +393 1 +394 1 +395 2 +396 3 +397 2 +399 2 +4 1 +400 1 +401 5 +402 1 +403 3 +404 2 +406 4 +407 1 +409 3 +41 1 +411 1 +413 2 +414 2 +417 3 +418 1 +419 1 +42 2 +421 1 +424 2 +427 1 +429 2 +43 1 +430 3 +431 3 +432 1 +435 1 +436 1 +437 1 +438 3 +439 2 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 3 +455 1 +457 1 +458 2 +459 2 +460 1 +462 2 +463 2 +466 3 +467 1 +468 4 +469 5 +47 1 +470 1 +472 1 +475 1 +477 1 +478 2 +479 1 +480 3 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 4 +490 1 +491 1 +492 2 +493 1 +494 1 +495 1 +496 1 +497 1 +498 3 +5 3 +51 2 +53 1 +54 1 +57 1 +58 2 +64 1 +65 1 +66 1 +67 2 +69 1 +70 3 +72 2 +74 1 +76 2 +77 1 +78 1 +8 1 +80 1 +82 1 +83 2 +84 2 +85 1 +86 1 +87 1 +9 1 +90 3 +92 1 +95 2 +96 1 +97 2 +98 2 +PREHOOK: query: select * from src_multi2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_multi2 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_multi2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_multi2 +#### A masked pattern was here #### +val_0 3 +val_10 1 +val_100 2 +val_103 2 +val_104 2 +val_105 1 +val_11 1 +val_111 1 +val_113 2 +val_114 1 +val_116 1 +val_118 2 +val_119 3 +val_12 2 +val_120 2 +val_125 2 +val_126 1 +val_128 3 +val_129 2 +val_131 1 +val_133 1 +val_134 2 +val_136 1 +val_137 2 +val_138 4 +val_143 1 +val_145 1 +val_146 2 +val_149 2 +val_15 2 +val_150 1 +val_152 2 +val_153 1 +val_155 1 +val_156 1 +val_157 1 +val_158 1 +val_160 1 +val_162 1 +val_163 1 +val_164 2 +val_165 2 +val_166 1 +val_167 3 +val_168 1 +val_169 4 +val_17 1 +val_170 1 +val_172 2 +val_174 2 +val_175 2 +val_176 2 +val_177 1 +val_178 1 +val_179 2 +val_18 2 +val_180 1 +val_181 1 +val_183 1 +val_186 1 +val_187 3 +val_189 1 +val_19 1 +val_190 1 +val_191 2 +val_192 1 +val_193 3 +val_194 1 +val_195 2 +val_196 1 +val_197 2 +val_199 3 +val_2 1 +val_20 1 +val_200 2 +val_201 1 +val_202 1 +val_203 2 +val_205 2 +val_207 2 +val_208 3 +val_209 2 +val_213 2 +val_214 1 +val_216 2 +val_217 2 +val_218 1 +val_219 2 +val_221 2 +val_222 1 +val_223 2 +val_224 2 +val_226 1 +val_228 1 +val_229 2 +val_230 5 +val_233 2 +val_235 1 +val_237 2 +val_238 2 +val_239 2 +val_24 2 +val_241 1 +val_242 2 +val_244 1 +val_247 1 +val_248 1 +val_249 1 +val_252 1 +val_255 2 +val_256 2 +val_257 1 +val_258 1 +val_26 2 +val_260 1 +val_262 1 +val_263 1 +val_265 2 +val_266 1 +val_27 1 +val_272 2 +val_273 3 +val_274 1 +val_275 1 +val_277 4 +val_278 2 +val_28 1 +val_280 2 +val_281 2 +val_282 2 +val_283 1 +val_284 1 +val_285 1 +val_286 1 +val_287 1 +val_288 2 +val_289 1 +val_291 1 +val_292 1 +val_296 1 +val_298 3 +val_30 1 +val_302 1 +val_305 1 +val_306 1 +val_307 2 +val_308 1 +val_309 2 +val_310 1 +val_311 3 +val_315 1 +val_316 3 +val_317 2 +val_318 3 +val_321 2 +val_322 2 +val_323 1 +val_325 2 +val_327 3 +val_33 1 +val_331 2 +val_332 1 +val_333 2 +val_335 1 +val_336 1 +val_338 1 +val_339 1 +val_34 1 +val_341 1 +val_342 2 +val_344 2 +val_345 1 +val_348 5 +val_35 3 +val_351 1 +val_353 2 +val_356 1 +val_360 1 +val_362 1 +val_364 1 +val_365 1 +val_366 1 +val_367 2 +val_368 1 +val_369 3 +val_37 2 +val_373 1 +val_374 1 +val_375 1 +val_377 1 +val_378 1 +val_379 1 +val_382 2 +val_384 3 +val_386 1 +val_389 1 +val_392 1 +val_393 1 +val_394 1 +val_395 2 +val_396 3 +val_397 2 +val_399 2 +val_4 1 +val_400 1 +val_401 5 +val_402 1 +val_403 3 +val_404 2 +val_406 4 +val_407 1 +val_409 3 +val_41 1 +val_411 1 +val_413 2 +val_414 2 +val_417 3 +val_418 1 +val_419 1 +val_42 2 +val_421 1 +val_424 2 +val_427 1 +val_429 2 +val_43 1 +val_430 3 +val_431 3 +val_432 1 +val_435 1 +val_436 1 +val_437 1 +val_438 3 +val_439 2 +val_44 1 +val_443 1 +val_444 1 +val_446 1 +val_448 1 +val_449 1 +val_452 1 +val_453 1 +val_454 3 +val_455 1 +val_457 1 +val_458 2 +val_459 2 +val_460 1 +val_462 2 +val_463 2 +val_466 3 +val_467 1 +val_468 4 +val_469 5 +val_47 1 +val_470 1 +val_472 1 +val_475 1 +val_477 1 +val_478 2 +val_479 1 +val_480 3 +val_481 1 +val_482 1 +val_483 1 +val_484 1 +val_485 1 +val_487 1 +val_489 4 +val_490 1 +val_491 1 +val_492 2 +val_493 1 +val_494 1 +val_495 1 +val_496 1 +val_497 1 +val_498 3 +val_5 3 +val_51 2 +val_53 1 +val_54 1 +val_57 1 +val_58 2 +val_64 1 +val_65 1 +val_66 1 +val_67 2 +val_69 1 +val_70 3 +val_72 2 +val_74 1 +val_76 2 +val_77 1 +val_78 1 +val_8 1 +val_80 1 +val_82 1 +val_83 2 +val_84 2 +val_85 1 +val_86 1 +val_87 1 +val_9 1 +val_90 3 +val_92 1 +val_95 2 +val_96 1 +val_97 2 +val_98 2 +PREHOOK: query: select * from src_multi3 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_multi3 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_multi3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_multi3 +#### A masked pattern was here #### +0 val_0 +4 val_4 +8 val_8 +0 val_0 +0 val_0 +5 val_5 +5 val_5 +2 val_2 +5 val_5 +9 val_9 diff --git a/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out b/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out index 2b4a331..bd50727 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out @@ -58,48 +58,54 @@ INSERT OVERWRITE TABLE src_5 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-5, Stage-4 - Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-2 Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: b - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark Edges: - Reducer 10 <- Map 9 (GROUP, 1) - Reducer 11 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 10 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 7 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 11 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 8 <- Reducer 7 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 9 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 7 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP SORT, 1) + Reducer 5 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 9 <- Map 8 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: b Reduce Output Operator sort order: value expressions: key (type: string), value (type: string) + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Map 6 Map Operator Tree: TableScan + alias: a + Filter Operator + predicate: (((key > '9') and key is not null) and value is not null) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Map 7 + Map Operator Tree: + TableScan alias: s1 Filter Operator predicate: (key > '2') (type: boolean) @@ -110,7 +116,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Map 9 + Map 8 Map Operator Tree: TableScan alias: s1 @@ -124,24 +130,7 @@ STAGE PLANS: Reduce Output Operator sort order: value expressions: _col0 (type: bigint) - Reducer 10 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Select Operator - expressions: 0 (type: bigint) - outputColumnNames: _col0 - Group By Operator - keys: _col0 (type: bigint) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - Reducer 11 + Reducer 2 Reduce Operator Tree: Join Operator condition map: @@ -155,7 +144,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) value expressions: _col1 (type: string) - Reducer 7 + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -173,7 +162,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + value expressions: _col1 (type: string) - Reducer 8 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) @@ -185,11 +174,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_5 + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_4 + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Select Operator + expressions: 0 (type: bigint) + outputColumnNames: _col0 + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: Stage: Stage-3 Dependency Collection - Stage: Stage-0 + Stage: Stage-1 Move Operator tables: replace: true @@ -197,12 +222,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 + name: default.src_5 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator - Stage: Stage-1 + Stage: Stage-0 Move Operator tables: replace: true @@ -210,60 +235,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_5 - - Stage: Stage-7 - Stats-Aggr Operator + name: default.src_4 Stage: Stage-5 - Spark - Edges: - Reducer 5 <- Map 2 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Map 4 - Map Operator Tree: - TableScan - alias: a - Filter Operator - predicate: (((key > '9') and key is not null) and value is not null) (type: boolean) - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 + Stats-Aggr Operator PREHOOK: query: from src b INSERT OVERWRITE TABLE src_4 @@ -304,13 +279,11 @@ POSTHOOK: Lineage: src_4.value EXPRESSION [(src)b.FieldSchema(name:value, type:s POSTHOOK: Lineage: src_5.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: src_5.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] RUN: Stage-2:MAPRED -RUN: Stage-4:MAPRED -RUN: Stage-5:MAPRED RUN: Stage-3:DEPENDENCY_COLLECTION -RUN: Stage-0:MOVE RUN: Stage-1:MOVE -RUN: Stage-6:STATS -RUN: Stage-7:STATS +RUN: Stage-0:MOVE +RUN: Stage-4:STATS +RUN: Stage-5:STATS PREHOOK: query: select * from src_4 PREHOOK: type: QUERY PREHOOK: Input: default@src_4 @@ -487,48 +460,54 @@ INSERT OVERWRITE TABLE src_5 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-5, Stage-4 - Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-2 Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: b - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Spark Edges: - Reducer 10 <- Map 9 (GROUP, 1) - Reducer 11 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 10 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 7 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 11 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 8 <- Reducer 7 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 9 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 7 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP SORT, 1) + Reducer 5 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 9 <- Map 8 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: b Reduce Output Operator sort order: value expressions: key (type: string), value (type: string) + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Map 6 Map Operator Tree: TableScan + alias: a + Filter Operator + predicate: (((key > '9') and key is not null) and value is not null) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Map 7 + Map Operator Tree: + TableScan alias: s1 Filter Operator predicate: (key > '2') (type: boolean) @@ -539,7 +518,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Map 9 + Map 8 Map Operator Tree: TableScan alias: s1 @@ -553,24 +532,7 @@ STAGE PLANS: Reduce Output Operator sort order: value expressions: _col0 (type: bigint) - Reducer 10 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Select Operator - expressions: 0 (type: bigint) - outputColumnNames: _col0 - Group By Operator - keys: _col0 (type: bigint) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - Reducer 11 + Reducer 2 Reduce Operator Tree: Join Operator condition map: @@ -584,7 +546,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) value expressions: _col1 (type: string) - Reducer 7 + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -602,7 +564,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + value expressions: _col1 (type: string) - Reducer 8 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) @@ -614,11 +576,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_5 + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_4 + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Select Operator + expressions: 0 (type: bigint) + outputColumnNames: _col0 + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: Stage: Stage-3 Dependency Collection - Stage: Stage-0 + Stage: Stage-1 Move Operator tables: replace: true @@ -626,12 +624,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 + name: default.src_5 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator - Stage: Stage-1 + Stage: Stage-0 Move Operator tables: replace: true @@ -639,60 +637,10 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_5 - - Stage: Stage-7 - Stats-Aggr Operator + name: default.src_4 Stage: Stage-5 - Spark - Edges: - Reducer 5 <- Map 2 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Map 4 - Map Operator Tree: - TableScan - alias: a - Filter Operator - predicate: (((key > '9') and key is not null) and value is not null) (type: boolean) - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 + Stats-Aggr Operator PREHOOK: query: from src b INSERT OVERWRITE TABLE src_4 @@ -733,13 +681,11 @@ POSTHOOK: Lineage: src_4.value EXPRESSION [(src)b.FieldSchema(name:value, type:s POSTHOOK: Lineage: src_5.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: src_5.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] RUN: Stage-2:MAPRED -RUN: Stage-4:MAPRED -RUN: Stage-5:MAPRED RUN: Stage-3:DEPENDENCY_COLLECTION -RUN: Stage-0:MOVE RUN: Stage-1:MOVE -RUN: Stage-6:STATS -RUN: Stage-7:STATS +RUN: Stage-0:MOVE +RUN: Stage-4:STATS +RUN: Stage-5:STATS PREHOOK: query: select * from src_4 PREHOOK: type: QUERY PREHOOK: Input: default@src_4 diff --git a/ql/src/test/results/clientpositive/spark/union18.q.out b/ql/src/test/results/clientpositive/spark/union18.q.out index f94fa0b..5ea3d2f 100644 --- a/ql/src/test/results/clientpositive/spark/union18.q.out +++ b/ql/src/test/results/clientpositive/spark/union18.q.out @@ -34,23 +34,21 @@ INSERT OVERWRITE TABLE DEST2 SELECT unionsrc.key, unionsrc.value, unionsrc.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) - Union 5 <- Map 6 (NONE, 0), Reducer 4 (NONE, 0) + Reducer 2 <- Map 1 (GROUP, 1) + Union 3 <- Map 4 (NONE, 0), Reducer 2 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: s1 @@ -62,20 +60,34 @@ STAGE PLANS: Reduce Output Operator sort order: value expressions: _col0 (type: bigint) - Map 6 + Map 4 Map Operator Tree: TableScan alias: s2 Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -84,22 +96,6 @@ STAGE PLANS: Select Operator expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Union 5 - Vertex: Union 5 - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 @@ -110,6 +106,18 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + Union 3 + Vertex: Union 3 Stage: Stage-3 Dependency Collection @@ -124,7 +132,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -137,26 +145,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + Stats-Aggr Operator PREHOOK: query: FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL diff --git a/ql/src/test/results/clientpositive/spark/union19.q.out b/ql/src/test/results/clientpositive/spark/union19.q.out index 8dcb543..3a07e6d 100644 --- a/ql/src/test/results/clientpositive/spark/union19.q.out +++ b/ql/src/test/results/clientpositive/spark/union19.q.out @@ -34,23 +34,22 @@ INSERT OVERWRITE TABLE DEST2 SELECT unionsrc.key, unionsrc.value, unionsrc.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) - Union 5 <- Map 6 (NONE, 0), Reducer 4 (NONE, 0) + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 4 <- Union 3 (GROUP, 1) + Union 3 <- Map 5 (NONE, 0), Reducer 2 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: s1 @@ -62,20 +61,37 @@ STAGE PLANS: Reduce Output Operator sort order: value expressions: _col0 (type: bigint) - Map 6 + Map 5 Map Operator Tree: TableScan alias: s2 Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -84,24 +100,6 @@ STAGE PLANS: Select Operator expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Union 5 - Vertex: Union 5 - - Stage: Stage-4 - Spark - Edges: - Reducer 7 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 @@ -115,7 +113,17 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) value expressions: _col1 (type: bigint) - Reducer 7 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -132,6 +140,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Union 3 + Vertex: Union 3 Stage: Stage-3 Dependency Collection @@ -146,7 +156,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -159,26 +169,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + Stats-Aggr Operator PREHOOK: query: FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL diff --git a/ql/src/test/results/clientpositive/spark/union_remove_6.q.out b/ql/src/test/results/clientpositive/spark/union_remove_6.q.out index 6730010..4cf23b2 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_6.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_6.q.out @@ -64,22 +64,20 @@ insert overwrite table outputTbl2 select * POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) - Reducer 7 <- Map 6 (GROUP, 1) - Union 5 <- Reducer 4 (NONE, 0), Reducer 7 (NONE, 0) + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 5 <- Map 4 (GROUP, 1) + Union 3 <- Reducer 2 (NONE, 0), Reducer 5 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: inputtbl1 @@ -96,7 +94,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) value expressions: _col1 (type: bigint) - Map 6 + Map 4 Map Operator Tree: TableScan alias: inputtbl1 @@ -113,7 +111,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) value expressions: _col1 (type: bigint) - Reducer 4 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -123,13 +121,27 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Reducer 7 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl2 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -139,22 +151,6 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Union 5 - Vertex: Union 5 - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 @@ -165,6 +161,18 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl2 + Union 3 + Vertex: Union 3 Stage: Stage-3 Dependency Collection @@ -189,24 +197,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl2 - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl2 - PREHOOK: query: FROM ( SELECT key, count(1) as values from inputTbl1 group by key UNION ALL diff --git a/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out b/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out index 909378b..3b11010 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out @@ -4925,22 +4925,23 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 Stage-1 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-1 Stage-0 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 4 <- Map 3 (GROUP SORT, 1) + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 4 <- Reducer 3 (GROUP SORT, 1) + Reducer 5 <- Reducer 2 (GROUP SORT, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: part @@ -4997,40 +4998,11 @@ STAGE PLANS: name: default.part Truncated Path -> Alias: /part [part] - Reducer 4 + Reducer 2 Needs Tagging: false Reduce Operator Tree: Extract PTF Operator - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-4 - Spark - Edges: - Reducer 5 <- Map 1 (GROUP SORT, 1) - Reducer 6 <- Reducer 5 (GROUP SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - GatherStats: false Reduce Output Operator key expressions: _col2 (type: string), _col5 (type: int) sort order: ++ @@ -5038,32 +5010,17 @@ STAGE PLANS: tag: -1 value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Truncated Path -> Alias: -#### A masked pattern was here #### - Reducer 5 + Select Operator + expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + auto parallelism: true + Reducer 3 Needs Tagging: false Reduce Operator Tree: Extract @@ -5075,7 +5032,7 @@ STAGE PLANS: tag: -1 value expressions: _wcol0 (type: bigint), _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: true - Reducer 6 + Reducer 4 Needs Tagging: false Reduce Operator Tree: Extract @@ -5108,6 +5065,39 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Reducer 5 + Needs Tagging: false + Reduce Operator Tree: + Extract + PTF Operator + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns p_mfgr,p_name,p_size,r,dr,s + columns.comments + columns.types string:string:int:int:int:double +#### A masked pattern was here #### + name default.part_4 + serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_4 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-3 Dependency Collection @@ -5134,7 +5124,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_5 - Stage: Stage-6 + Stage: Stage-4 Stats-Aggr Operator #### A masked pattern was here #### @@ -5160,88 +5150,9 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_4 - Stage: Stage-7 - Stats-Aggr Operator -#### A masked pattern was here #### - Stage: Stage-5 - Spark - Edges: - Reducer 7 <- Map 2 (GROUP SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - GatherStats: false - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - auto parallelism: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Truncated Path -> Alias: -#### A masked pattern was here #### - Reducer 7 - Needs Tagging: false - Reduce Operator Tree: - Extract - PTF Operator - Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns p_mfgr,p_name,p_size,r,dr,s - columns.comments - columns.types string:string:int:int:int:double -#### A masked pattern was here #### - name default.part_4 - serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stats-Aggr Operator #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.part_4 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false PREHOOK: query: from noop(on part partition by p_mfgr