diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index cb59560..555343e 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -507,7 +507,6 @@ HIVECONVERTJOINNOCONDITIONALTASK("hive.auto.convert.join.noconditionaltask", true), HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD("hive.auto.convert.join.noconditionaltask.size", 10000000L), - HIVEOPTIMIZEMAPJOINFOLLOWEDBYMR("hive.optimize.mapjoin.mapreduce", false), HIVESKEWJOINKEY("hive.skewjoin.key", 100000), HIVESKEWJOINMAPJOINNUMMAPTASK("hive.skewjoin.mapjoin.map.tasks", 10000), HIVESKEWJOINMAPJOINMINSPLIT("hive.skewjoin.mapjoin.min.split", 33554432L), //32M diff --git conf/hive-default.xml.template conf/hive-default.xml.template index e0b7f5c..f01e715 100644 --- conf/hive-default.xml.template +++ conf/hive-default.xml.template @@ -860,16 +860,6 @@ - hive.optimize.mapjoin.mapreduce - false - If hive.auto.convert.join is off, this parameter does not take - affect. If it is on, and if there are map-join jobs followed by a map-reduce - job (for e.g a group by), each map-only job is merged with the following - map-reduce job. - - - - hive.script.auto.progress false Whether Hive Tranform/Map/Reduce Clause should automatically send progress information to TaskTracker to avoid the task getting killed because of inactivity. Hive sends progress information when the script is outputting to stderr. This option removes the need of periodically producing stderr messages, but users should be cautious because this may prevent infinite loops in the scripts to be killed by TaskTracker. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java index 66b84ff..42b10a8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java @@ -1,9 +1,4 @@ /** - <<<<<<< HEAD - ======= - * Copyright 2010 The Apache Software Foundation - * - >>>>>>> HIVE-1402 [jira] Add parallel ORDER BY to Hive * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -30,30 +25,30 @@ public class OperatorUtils { public static Set findOperators(Operator start, Class clazz) { - return findOperator(start, clazz, new HashSet()); + return findOperators(start, clazz, new HashSet()); } public static T findSingleOperator(Operator start, Class clazz) { - Set found = findOperator(start, clazz, new HashSet()); + Set found = findOperators(start, clazz, new HashSet()); return found.size() == 1 ? found.iterator().next() : null; } public static Set findOperators(Collection> starts, Class clazz) { Set found = new HashSet(); for (Operator start : starts) { - findOperator(start, clazz, found); + findOperators(start, clazz, found); } return found; } @SuppressWarnings("unchecked") - private static Set findOperator(Operator start, Class clazz, Set found) { + private static Set findOperators(Operator start, Class clazz, Set found) { if (clazz.isInstance(start)) { found.add((T) start); } if (start.getChildOperators() != null) { for (Operator child : start.getChildOperators()) { - findOperator(child, clazz, found); + findOperators(child, clazz, found); } } return found; diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index bf224e0..0408993 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -25,6 +25,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Properties; import java.util.Set; @@ -62,7 +63,6 @@ import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc; -import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MapredLocalWork; import org.apache.hadoop.hive.ql.plan.MapredWork; @@ -987,6 +987,77 @@ static boolean hasBranchFinished(Object... children) { return true; } + + + /** + * Replace the Map-side operator tree associated with targetAlias in + * target with the Map-side operator tree associated with sourceAlias in source. + * @param sourceAlias + * @param targetAlias + * @param source + * @param target + */ + public static void replaceMapWork(String sourceAlias, String targetAlias, + MapWork source, MapWork target) { + Map> sourcePathToAliases = source.getPathToAliases(); + Map sourcePathToPartitionInfo = source.getPathToPartitionInfo(); + Map> sourceAliasToWork = source.getAliasToWork(); + Map sourceAliasToPartnInfo = source.getAliasToPartnInfo(); + + Map> targetPathToAliases = target.getPathToAliases(); + Map targetPathToPartitionInfo = target.getPathToPartitionInfo(); + Map> targetAliasToWork = target.getAliasToWork(); + Map targetAliasToPartnInfo = target.getAliasToPartnInfo(); + + if (!sourceAliasToWork.containsKey(sourceAlias) || + !targetAliasToWork.containsKey(targetAlias)) { + // Nothing to do if there is no operator tree associated with + // sourceAlias in source or there is not operator tree associated + // with targetAlias in target. + return; + } + + if (sourceAliasToWork.size() > 1) { + // If there are multiple aliases in source, we do not know + // how to merge. + return; + } + + // Remove unnecessary information from target + targetAliasToWork.remove(targetAlias); + targetAliasToPartnInfo.remove(targetAlias); + List pathsToRemove = new ArrayList(); + for (Entry> entry: targetPathToAliases.entrySet()) { + ArrayList aliases = entry.getValue(); + aliases.remove(targetAlias); + if (aliases.isEmpty()) { + pathsToRemove.add(entry.getKey()); + } + } + for (String pathToRemove: pathsToRemove) { + targetPathToAliases.remove(pathToRemove); + targetPathToPartitionInfo.remove(pathToRemove); + } + + // Add new information from source to target + targetAliasToWork.put(sourceAlias, sourceAliasToWork.get(sourceAlias)); + targetAliasToPartnInfo.putAll(sourceAliasToPartnInfo); + targetPathToPartitionInfo.putAll(sourcePathToPartitionInfo); + List pathsToAdd = new ArrayList(); + for (Entry> entry: sourcePathToAliases.entrySet()) { + ArrayList aliases = entry.getValue(); + if (aliases.contains(sourceAlias)) { + pathsToAdd.add(entry.getKey()); + } + } + for (String pathToAdd: pathsToAdd) { + if (!targetPathToAliases.containsKey(pathToAdd)) { + targetPathToAliases.put(pathToAdd, new ArrayList()); + } + targetPathToAliases.get(pathToAdd).add(sourceAlias); + } + } + private GenMapRedUtils() { // prevent instantiation } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/QueryPlanTreeTransformation.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/QueryPlanTreeTransformation.java index f704ec1..7b0f5a1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/QueryPlanTreeTransformation.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/QueryPlanTreeTransformation.java @@ -55,8 +55,8 @@ private static void setNewTag(IntraQueryCorrelation correlation, throws SemanticException { int newTag = bottomRSToNewTag.get(rsop); int oldTag = rsop.getConf().getTag(); - // if this child of dispatcher does not use tag, we just set the oldTag to 0; if (oldTag == -1) { + // if this child of DemuxOperator does not use tag, we just set the oldTag to 0. oldTag = 0; } Operator child = CorrelationUtilities.getSingleChild(rsop, true); @@ -68,7 +68,8 @@ private static void setNewTag(IntraQueryCorrelation correlation, rsop.getConf().setTag(newTag); } - /** Based on the correlation, we transform the query plan tree (operator tree). + /** + * Based on the correlation, we transform the query plan tree (operator tree). * In here, we first create DemuxOperator and all bottom ReduceSinkOperators * (bottom means near TableScanOperaotr) in the correlation will be be * the parents of the DemuxOperaotr. We also reassign tags to those diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java index d532bb1..849605d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java @@ -26,6 +26,7 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.ObjectPair; @@ -35,6 +36,7 @@ import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorUtils; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; @@ -54,7 +56,6 @@ import org.apache.hadoop.hive.ql.plan.MapredLocalWork; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.ReduceWork; /* @@ -109,182 +110,76 @@ public CommonJoinTaskDispatcher(PhysicalContext context) { super(context); } - // Get the position of the big table for this join operator and the given alias - private int getPosition(MapWork work, Operator joinOp, - String alias) { - Operator parentOp = work.getAliasToWork().get(alias); - - // reduceSinkOperator's child is null, but joinOperator's parents is reduceSink - while ((parentOp.getChildOperators() != null) && - (!parentOp.getChildOperators().isEmpty())) { - parentOp = parentOp.getChildOperators().get(0); - } - - return joinOp.getParentOperators().indexOf(parentOp); - } - - /* - * A task and its child task has been converted from join to mapjoin. - * See if the two tasks can be merged. + /** + * Calculate the total size of local tables in loclWork. + * @param localWork + * @return the total size of local tables. Or -1, if the total + * size is unknown. */ - private void mergeMapJoinTaskWithChildMapJoinTask(MapRedTask task, Configuration conf) { - MapRedTask childTask = (MapRedTask) task.getChildTasks().get(0); - MapWork work = task.getWork().getMapWork(); - MapredLocalWork localWork = work.getMapLocalWork(); - MapWork childWork = childTask.getWork().getMapWork(); - MapredLocalWork childLocalWork = childWork.getMapLocalWork(); - - // Can this be merged - Map> aliasToWork = work.getAliasToWork(); - if (aliasToWork.size() > 1) { - return; - } - - Operator op = aliasToWork.values().iterator().next(); - while (op.getChildOperators() != null) { - // Dont perform this optimization for multi-table inserts - if (op.getChildOperators().size() > 1) { - return; - } - op = op.getChildOperators().get(0); - } - - if (!(op instanceof FileSinkOperator)) { - return; - } - - FileSinkOperator fop = (FileSinkOperator) op; - String workDir = fop.getConf().getDirName(); - - Map> childPathToAliases = childWork.getPathToAliases(); - if (childPathToAliases.size() > 1) { - return; - } - - // The filesink writes to a different directory - if (!childPathToAliases.keySet().iterator().next().equals(workDir)) { - return; - } - - // Either of them should not be bucketed - if ((localWork.getBucketMapjoinContext() != null) || - (childLocalWork.getBucketMapjoinContext() != null)) { - return; - } - - // Merge the trees - if (childWork.getAliasToWork().size() > 1) { - return; - } - - long mapJoinSize = HiveConf.getLongVar(conf, - HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD); + private long calculateLocalTableTotalSize(MapredLocalWork localWork) { long localTableTotalSize = 0; - for (String alias : localWork.getAliasToWork().keySet()) { - Long tabSize = aliasToSize.get(alias); - if (tabSize == null) { - /* - * if the size is unavailable, we need to assume a size 1 greater than mapJoinSize - * this implies that merge cannot happen so we can return. - */ - return; - } - localTableTotalSize += tabSize; + if (localWork == null) { + return localTableTotalSize; } - - for (String alias : childLocalWork.getAliasToWork().keySet()) { + for (String alias : localWork.getAliasToWork().keySet()) { Long tabSize = aliasToSize.get(alias); if (tabSize == null) { - /* - * if the size is unavailable, we need to assume a size 1 greater than mapJoinSize - * this implies that merge cannot happen so we can return. - */ - return; + // if the size is unavailable, we need to assume a size 1 greater than + // localTableTotalSizeLimit this implies that merge cannot happen + // so we will return false. + return -1; } localTableTotalSize += tabSize; - if (localTableTotalSize > mapJoinSize) { - return; - } - } - - // Merge the 2 trees - remove the FileSinkOperator from the first tree pass it to the - // top of the second - Operator childAliasOp = - childWork.getAliasToWork().values().iterator().next(); - if (fop.getParentOperators().size() > 1) { - return; } - Operator parentFOp = fop.getParentOperators().get(0); - // remove the unnecessary TableScan - if (childAliasOp instanceof TableScanOperator) { - TableScanOperator tso = (TableScanOperator)childAliasOp; - if (tso.getNumChild() != 1) { - // shouldn't happen - return; - } - childAliasOp = tso.getChildOperators().get(0); - childAliasOp.replaceParent(tso, parentFOp); - } else { - childAliasOp.setParentOperators(Utilities.makeList(parentFOp)); - } - parentFOp.replaceChild(fop, childAliasOp); + return localTableTotalSize; + } - work.getAliasToPartnInfo().putAll(childWork.getAliasToPartnInfo()); - for (Map.Entry childWorkEntry : childWork.getPathToPartitionInfo() - .entrySet()) { - if (childWork.getAliasToPartnInfo().containsValue(childWorkEntry.getKey())) { - work.getPathToPartitionInfo().put(childWorkEntry.getKey(), childWorkEntry.getValue()); + /** + * Check if the total size of local tables will be under + * the limit after we merge localWork1 and localWork2. + * The limit of the total size of local tables is defined by + * HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD. + * @param conf + * @param localWork1 + * @param localWork2 + * @return + */ + private boolean isLocalTableTotalSizeUnderLimitAfterMerge( + Configuration conf, + MapredLocalWork... localWorks) { + final long localTableTotalSizeLimit = HiveConf.getLongVar(conf, + HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD); + long localTableTotalSize = 0; + for (int i = 0; i < localWorks.length; i++) { + final long localWorkTableTotalSize = calculateLocalTableTotalSize(localWorks[i]); + if (localWorkTableTotalSize < 0) { + // The total size of local tables in localWork[i] is unknown. + return false; } + localTableTotalSize += localWorkTableTotalSize; } - localWork.getAliasToFetchWork().putAll(childLocalWork.getAliasToFetchWork()); - localWork.getAliasToWork().putAll(childLocalWork.getAliasToWork()); - - // remove the child task - List> oldChildTasks = childTask.getChildTasks(); - task.setChildTasks(oldChildTasks); - if (oldChildTasks != null) { - for (Task oldChildTask : oldChildTasks) { - oldChildTask.getParentTasks().remove(childTask); - oldChildTask.getParentTasks().add(task); - } + if (localTableTotalSize > localTableTotalSizeLimit) { + // The total size of local tables after we merge localWorks + // is larger than the limit set by + // HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD. + return false; } - boolean convertToSingleJob = HiveConf.getBoolVar(conf, - HiveConf.ConfVars.HIVEOPTIMIZEMAPJOINFOLLOWEDBYMR); - if (convertToSingleJob) { - copyReducerConf(task, childTask); - } + return true; } - /** - * Copy reducer configuration if the childTask also has a reducer. - * - * @param task - * @param childTask - */ - private void copyReducerConf(MapRedTask task, MapRedTask childTask) { - MapredWork mrChildWork = childTask.getWork(); - ReduceWork childWork = childTask.getWork().getReduceWork(); - if (childWork == null) { - return; - } + // Get the position of the big table for this join operator and the given alias + private int getPosition(MapWork work, Operator joinOp, + String alias) { + Operator parentOp = work.getAliasToWork().get(alias); - Operator childReducer = childWork.getReducer(); - MapredWork work = task.getWork(); - if (childReducer == null) { - return; + // reduceSinkOperator's child is null, but joinOperator's parents is reduceSink + while ((parentOp.getChildOperators() != null) && + (!parentOp.getChildOperators().isEmpty())) { + parentOp = parentOp.getChildOperators().get(0); } - ReduceWork rWork = new ReduceWork(); - work.setReduceWork(rWork); - rWork.setReducer(childReducer); - rWork.setNumReduceTasks(childWork.getNumReduceTasks()); - work.getMapWork().setJoinTree(mrChildWork.getMapWork().getJoinTree()); - rWork.setNeedsTagging(childWork.getNeedsTagging()); - - // Make sure the key configuration is correct, clear and regenerate. - rWork.getTagToValueDesc().clear(); - GenMapRedUtils.setKeyAndValueDescForTaskTree(task); + return joinOp.getParentOperators().indexOf(parentOp); } // create map join task and set big table as bigTablePosition @@ -305,129 +200,165 @@ private void copyReducerConf(MapRedTask task, MapRedTask childTask) { * A task and its child task has been converted from join to mapjoin. * See if the two tasks can be merged. */ - private void mergeMapJoinTaskWithMapReduceTask(MapRedTask mapJoinTask, Configuration conf) { + private void mergeMapJoinTaskIntoItsChildMapRedTask(MapRedTask mapJoinTask, Configuration conf) + throws SemanticException{ + // Step 1: Check if mapJoinTask has a single child. + // If so, check if we can merge mapJoinTask into that child. if (mapJoinTask.getChildTasks() == null || mapJoinTask.getChildTasks().size() > 1) { // No child-task to merge, nothing to do or there are more than one // child-tasks in which case we don't want to do anything. return; } - Task firstChildTask = mapJoinTask.getChildTasks().get(0); - if (!(firstChildTask instanceof MapRedTask)) { - // Nothing to do if it is not a mapreduce task. - return; - } - MapRedTask childTask = (MapRedTask) firstChildTask; - MapWork mapJoinWork = mapJoinTask.getWork().getMapWork(); - MapredWork childWork = childTask.getWork(); - if (childWork.getReduceWork() == null) { - // Not a MR job, nothing to merge. - return; - } - // Can this be merged - Map> aliasToWork = mapJoinWork.getAliasToWork(); - if (aliasToWork.size() > 1) { - return; - } - Map> childPathToAliases = childWork.getMapWork().getPathToAliases(); - if (childPathToAliases.size() > 1) { + Task childTask = mapJoinTask.getChildTasks().get(0); + if (!(childTask instanceof MapRedTask)) { + // Nothing to do if it is not a MapReduce task. return; } - // Locate leaf operator of the map-join task. Start by initializing leaf - // operator to be root operator. - Operator mapJoinLeafOperator = aliasToWork.values().iterator().next(); - while (mapJoinLeafOperator.getChildOperators() != null) { - // Dont perform this optimization for multi-table inserts - if (mapJoinLeafOperator.getChildOperators().size() > 1) { - return; - } - mapJoinLeafOperator = mapJoinLeafOperator.getChildOperators().get(0); - } + MapRedTask childMapRedTask = (MapRedTask) childTask; + MapWork mapJoinMapWork = mapJoinTask.getWork().getMapWork(); + MapWork childMapWork = childMapRedTask.getWork().getMapWork(); - assert (mapJoinLeafOperator instanceof FileSinkOperator); - if (!(mapJoinLeafOperator instanceof FileSinkOperator)) { - // Sanity check, shouldn't happen. + Map> mapJoinAliasToWork = + mapJoinMapWork.getAliasToWork(); + if (mapJoinAliasToWork.size() > 1) { + // Do not merge if the MapredWork of MapJoin has multiple input aliases. return; } - FileSinkOperator mapJoinTaskFileSinkOperator = (FileSinkOperator) mapJoinLeafOperator; - - // The filesink writes to a different directory - String workDir = mapJoinTaskFileSinkOperator.getConf().getDirName(); - if (!childPathToAliases.keySet().iterator().next().equals(workDir)) { + Entry> mapJoinAliasToWorkEntry = + mapJoinAliasToWork.entrySet().iterator().next(); + String mapJoinAlias = mapJoinAliasToWorkEntry.getKey(); + TableScanOperator mapJoinTaskTableScanOperator = + OperatorUtils.findSingleOperator( + mapJoinAliasToWorkEntry.getValue(), TableScanOperator.class); + if (mapJoinTaskTableScanOperator == null) { + throw new SemanticException("Expected a " + TableScanOperator.getOperatorName() + + " operator as the work associated with alias " + mapJoinAlias + + ". Found a " + mapJoinAliasToWork.get(mapJoinAlias).getName() + " operator."); + } + FileSinkOperator mapJoinTaskFileSinkOperator = + OperatorUtils.findSingleOperator( + mapJoinTaskTableScanOperator, FileSinkOperator.class); + if (mapJoinTaskFileSinkOperator == null) { + throw new SemanticException("Cannot find the " + FileSinkOperator.getOperatorName() + + " operator at the last operator of the MapJoin Task."); + } + + // The mapJoinTaskFileSinkOperator writes to a different directory + String childMRPath = mapJoinTaskFileSinkOperator.getConf().getDirName(); + List childMRAliases = childMapWork.getPathToAliases().get(childMRPath); + if (childMRAliases == null || childMRAliases.size() != 1) { return; } + String childMRAlias = childMRAliases.get(0); - MapredLocalWork mapJoinLocalWork = mapJoinWork.getMapLocalWork(); - MapredLocalWork childLocalWork = childWork.getMapWork().getMapLocalWork(); + MapredLocalWork mapJoinLocalWork = mapJoinMapWork.getMapLocalWork(); + MapredLocalWork childLocalWork = childMapWork.getMapLocalWork(); - // Either of them should not be bucketed if ((mapJoinLocalWork != null && mapJoinLocalWork.getBucketMapjoinContext() != null) || (childLocalWork != null && childLocalWork.getBucketMapjoinContext() != null)) { + // Right now, we do not handle the case that either of them is bucketed. + // We should relax this constraint with a follow-up jira. return; } - if (childWork.getMapWork().getAliasToWork().size() > 1) { + // We need to check if the total size of local tables is under the limit. + // At here, we are using a strong condition, which is the total size of + // local tables used by all input paths. Actually, we can relax this condition + // to check the total size of local tables for every input path. + // Example: + // UNION_ALL + // / \ + // / \ + // / \ + // / \ + // MapJoin1 MapJoin2 + // / | \ / | \ + // / | \ / | \ + // Big1 S1 S2 Big2 S3 S4 + // In this case, we have two MapJoins, MapJoin1 and MapJoin2. Big1 and Big2 are two + // big tables, and S1, S2, S3, and S4 are four small tables. Hash tables of S1 and S2 + // will only be used by Map tasks processing Big1. Hash tables of S3 and S4 will only + // be used by Map tasks processing Big2. If Big1!=Big2, we should only check if the size + // of S1 + S2 is under the limit, and if the size of S3 + S4 is under the limit. + // But, right now, we are checking the size of S1 + S2 + S3 + S4 is under the limit. + // If Big1=Big2, we will only scan a path once. So, MapJoin1 and MapJoin2 will be executed + // in the same Map task. In this case, we need to make sure the size of S1 + S2 + S3 + S4 + // is under the limit. + if (!isLocalTableTotalSizeUnderLimitAfterMerge(conf, mapJoinLocalWork, childLocalWork)){ + // The total size of local tables may not be under + // the limit after we merge mapJoinLocalWork and childLocalWork. + // Do not merge. return; } - Operator childAliasOp = - childWork.getMapWork().getAliasToWork().values().iterator().next(); - if (mapJoinTaskFileSinkOperator.getParentOperators().size() > 1) { - return; + TableScanOperator childMRTaskTableScanOperator = + OperatorUtils.findSingleOperator( + childMapWork.getAliasToWork().get(childMRAlias), TableScanOperator.class); + if (childMRTaskTableScanOperator == null) { + throw new SemanticException("Expected a " + TableScanOperator.getOperatorName() + + " operator as the work associated with alias " + childMRAlias + + ". Found a " + childMapWork.getAliasToWork().get(childMRAlias).getName() + " operator."); } - // remove the unnecessary TableScan - if (childAliasOp instanceof TableScanOperator) { - TableScanOperator tso = (TableScanOperator)childAliasOp; - if (tso.getNumChild() != 1) { - // shouldn't happen - return; - } - childAliasOp = tso.getChildOperators().get(0); - childAliasOp.getParentOperators().remove(tso); + List> parentsInMapJoinTask = + mapJoinTaskFileSinkOperator.getParentOperators(); + List> childrenInChildMRTask = + childMRTaskTableScanOperator.getChildOperators(); + if (parentsInMapJoinTask.size() > 1 || childrenInChildMRTask.size() > 1) { + // Do not merge if we do not know how to connect two operator trees. + return; } - // Merge the 2 trees - remove the FileSinkOperator from the first tree pass it to the - // top of the second - Operator parentFOp = mapJoinTaskFileSinkOperator - .getParentOperators().get(0); - parentFOp.getChildOperators().remove(mapJoinTaskFileSinkOperator); - parentFOp.getChildOperators().add(childAliasOp); - List> parentOps = - new ArrayList>(); - parentOps.add(parentFOp); - childAliasOp.setParentOperators(parentOps); - - mapJoinWork.getAliasToPartnInfo().putAll(childWork.getMapWork().getAliasToPartnInfo()); - for (Map.Entry childWorkEntry : childWork.getMapWork().getPathToPartitionInfo() - .entrySet()) { - if (childWork.getMapWork().getAliasToPartnInfo().containsValue(childWorkEntry.getKey())) { - mapJoinWork.getPathToPartitionInfo() - .put(childWorkEntry.getKey(), childWorkEntry.getValue()); - } - } + // Step 2: Merge mapJoinTask into the Map-side of its child. + // Step 2.1: Connect the operator trees of two MapRedTasks. + Operator parentInMapJoinTask = parentsInMapJoinTask.get(0); + Operator childInChildMRTask = childrenInChildMRTask.get(0); + parentInMapJoinTask.replaceChild(mapJoinTaskFileSinkOperator, childInChildMRTask); + childInChildMRTask.replaceParent(childMRTaskTableScanOperator, parentInMapJoinTask); - // Fill up stuff in local work - if (mapJoinLocalWork != null && childLocalWork != null) { - mapJoinLocalWork.getAliasToFetchWork().putAll(childLocalWork.getAliasToFetchWork()); - mapJoinLocalWork.getAliasToWork().putAll(childLocalWork.getAliasToWork()); - } + // Step 2.2: Replace the corresponding part childMRWork's MapWork. + GenMapRedUtils.replaceMapWork(mapJoinAlias, childMRAlias, mapJoinMapWork, childMapWork); - // remove the child task - List> oldChildTasks = childTask.getChildTasks(); - mapJoinTask.setChildTasks(oldChildTasks); - if (oldChildTasks != null) { - for (Task oldChildTask : oldChildTasks) { - oldChildTask.getParentTasks().remove(childTask); - oldChildTask.getParentTasks().add(mapJoinTask); + // Step 2.3: Fill up stuff in local work + if (mapJoinLocalWork != null) { + if (childLocalWork == null) { + childMapWork.setMapLocalWork(mapJoinLocalWork); + } else { + childLocalWork.getAliasToFetchWork().putAll(mapJoinLocalWork.getAliasToFetchWork()); + childLocalWork.getAliasToWork().putAll(mapJoinLocalWork.getAliasToWork()); } } - // Copy the reducer conf. - copyReducerConf(mapJoinTask, childTask); + // Step 2.4: Remove this MapJoin task + List> parentTasks = mapJoinTask.getParentTasks(); + mapJoinTask.setParentTasks(null); + mapJoinTask.setChildTasks(null); + childMapRedTask.getParentTasks().remove(mapJoinTask); + if (parentTasks != null) { + childMapRedTask.getParentTasks().addAll(parentTasks); + for (Task parentTask : parentTasks) { + parentTask.getChildTasks().remove(mapJoinTask); + if (!parentTask.getChildTasks().contains(childMapRedTask)) { + parentTask.getChildTasks().add(childMapRedTask); + } + } + } else { + if (physicalContext.getRootTasks().contains(mapJoinTask)) { + physicalContext.removeFromRootTask(mapJoinTask); + if (childMapRedTask.getParentTasks() != null && + childMapRedTask.getParentTasks().size() == 0 && + !physicalContext.getRootTasks().contains(childMapRedTask)) { + physicalContext.addToRootTask(childMapRedTask); + } + } + } + if (childMapRedTask.getParentTasks().size() == 0) { + childMapRedTask.setParentTasks(null); + } } public static boolean cannotConvert(String bigTableAlias, @@ -557,20 +488,7 @@ public static boolean cannotConvert(String bigTableAlias, // Can this task be merged with the child task. This can happen if a big table is being // joined with multiple small tables on different keys if ((newTask.getChildTasks() != null) && (newTask.getChildTasks().size() == 1)) { - if (newTask.getChildTasks().get(0).getTaskTag() == Task.MAPJOIN_ONLY_NOBACKUP) { - // Merging two map-join tasks - mergeMapJoinTaskWithChildMapJoinTask(newTask, conf); - } - - // Converted the join operator into a map-join. Now see if it can - // be merged into the following map-reduce job. - boolean convertToSingleJob = HiveConf.getBoolVar(conf, - HiveConf.ConfVars.HIVEOPTIMIZEMAPJOINFOLLOWEDBYMR); - if (convertToSingleJob) { - // Try merging a map-join task with a mapreduce job to have a - // single job. - mergeMapJoinTaskWithMapReduceTask(newTask, conf); - } + mergeMapJoinTaskIntoItsChildMapRedTask(newTask, conf); } return newTask; diff --git ql/src/test/queries/clientpositive/auto_join33.q ql/src/test/queries/clientpositive/auto_join33.q deleted file mode 100644 index 5c85842..0000000 --- ql/src/test/queries/clientpositive/auto_join33.q +++ /dev/null @@ -1,16 +0,0 @@ -set hive.auto.convert.join=true; -set hive.optimize.mapjoin.mapreduce=true; - --- empty tables -create table studenttab10k (name string, age int, gpa double); -create table votertab10k (name string, age int, registration string, contributions float); - -explain select s.name, count(distinct registration) -from studenttab10k s join votertab10k v -on (s.name = v.name) -group by s.name; - -select s.name, count(distinct registration) -from studenttab10k s join votertab10k v -on (s.name = v.name) -group by s.name; diff --git ql/src/test/queries/clientpositive/correlationoptimizer1.q ql/src/test/queries/clientpositive/correlationoptimizer1.q index 2adf855..b3fd3f7 100644 --- ql/src/test/queries/clientpositive/correlationoptimizer1.q +++ ql/src/test/queries/clientpositive/correlationoptimizer1.q @@ -33,23 +33,6 @@ set hive.optimize.correlation=true; -- Enable hive.auto.convert.join. -- Correlation Optimizer will detect that the join will be converted to a Map-join, -- so it will not try to optimize this query. -EXPLAIN -SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) -FROM (SELECT x.key AS key, count(1) AS cnt - FROM src1 x JOIN src y ON (x.key = y.key) - GROUP BY x.key) tmp; - -SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) -FROM (SELECT x.key AS key, count(1) AS cnt - FROM src1 x JOIN src y ON (x.key = y.key) - GROUP BY x.key) tmp; - -set hive.auto.convert.join=true; -set hive.optimize.mapjoin.mapreduce=true; -set hive.optimize.correlation=true; --- Enable hive.auto.convert.join. --- Correlation Optimizer will detect that the join will be converted to a Map-join, --- so it will not try to optimize this query. -- We should generate 1 MR job for subquery tmp. EXPLAIN SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) diff --git ql/src/test/queries/clientpositive/correlationoptimizer3.q ql/src/test/queries/clientpositive/correlationoptimizer3.q index fcbb764..89b7c15 100644 --- ql/src/test/queries/clientpositive/correlationoptimizer3.q +++ ql/src/test/queries/clientpositive/correlationoptimizer3.q @@ -36,7 +36,6 @@ FROM (SELECT b.key AS key, b.cnt AS cnt, d.value AS value set hive.optimize.correlation=true; set hive.auto.convert.join=true; -set hive.optimize.mapjoin.mapreduce=true; -- Enable hive.auto.convert.join. EXPLAIN SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)), SUM(HASH(tmp.value)) @@ -79,10 +78,9 @@ FROM (SELECT d.key AS key, d.cnt AS cnt, b.value as value FROM (SELECT x.key, x.value FROM src1 x JOIN src y ON (x.key = y.key)) b JOIN (SELECT x.key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key = y.key) group by x.key) d ON b.key = d.key) tmp; - + set hive.optimize.correlation=true; set hive.auto.convert.join=true; -set hive.optimize.mapjoin.mapreduce=true; -- Enable hive.auto.convert.join. EXPLAIN SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)), SUM(HASH(tmp.value)) diff --git ql/src/test/queries/clientpositive/correlationoptimizer4.q ql/src/test/queries/clientpositive/correlationoptimizer4.q index 0e84cb7..70fcdfc 100644 --- ql/src/test/queries/clientpositive/correlationoptimizer4.q +++ ql/src/test/queries/clientpositive/correlationoptimizer4.q @@ -33,10 +33,9 @@ SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt FROM T2 x JOIN T1 y ON (x.key = y.key) JOIN T3 z ON (y.key = z.key) GROUP BY y.key) tmp; - + set hive.optimize.correlation=true; set hive.auto.convert.join=true; -set hive.optimize.mapjoin.mapreduce=true; -- Enable hive.auto.convert.join. EXPLAIN SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) diff --git ql/src/test/queries/clientpositive/correlationoptimizer5.q ql/src/test/queries/clientpositive/correlationoptimizer5.q index 1900f5d..ac836c0 100644 --- ql/src/test/queries/clientpositive/correlationoptimizer5.q +++ ql/src/test/queries/clientpositive/correlationoptimizer5.q @@ -52,7 +52,6 @@ ON b.key = d.key; set hive.optimize.correlation=true; set hive.auto.convert.join=true; -set hive.optimize.mapjoin.mapreduce=true; set hive.auto.convert.join.noconditionaltask.size=10000000000; -- Enable hive.auto.convert.join. EXPLAIN diff --git ql/src/test/queries/clientpositive/correlationoptimizer6.q ql/src/test/queries/clientpositive/correlationoptimizer6.q index 88d790c..89c0609 100644 --- ql/src/test/queries/clientpositive/correlationoptimizer6.q +++ ql/src/test/queries/clientpositive/correlationoptimizer6.q @@ -36,7 +36,6 @@ ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.cnt; set hive.optimize.correlation=true; set hive.auto.convert.join=true; -set hive.optimize.mapjoin.mapreduce=true; -- Enable hive.auto.convert.join. EXPLAIN SELECT xx.key, xx.cnt, yy.key, yy.cnt @@ -306,7 +305,6 @@ ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.value, yy.cnt; set hive.optimize.correlation=true; set hive.auto.convert.join=true; -set hive.optimize.mapjoin.mapreduce=true; EXPLAIN SELECT xx.key, xx.cnt, yy.key, yy.value, yy.cnt FROM diff --git ql/src/test/queries/clientpositive/correlationoptimizer7.q ql/src/test/queries/clientpositive/correlationoptimizer7.q index 9b18972..3ba83f4 100644 --- ql/src/test/queries/clientpositive/correlationoptimizer7.q +++ ql/src/test/queries/clientpositive/correlationoptimizer7.q @@ -36,19 +36,14 @@ ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.value; set hive.auto.convert.join=true; set hive.auto.convert.join.noconditionaltask=true; set hive.auto.convert.join.noconditionaltask.size=10000000000; -set hive.optimize.mapjoin.mapreduce=true; set hive.optimize.correlation=false; -- Without correlation optimizer, we will have 3 MR jobs. -- The first one is a MapJoin and Aggregation (in the Reduce Phase). -- The second one is another MapJoin. The third one is for ordering. --- With the correlation optimizer, right now, we still have --- 3 MR jobs. The first one is a MapJoin and the map-side aggregation (a map-only job). --- The second one have the reduce-side aggregation and the second join. --- The third one is for ordering. --- Although we have turned on hive.optimize.mapjoin.mapreduce, that optimizer --- can not handle the case that the MR job (the one which a map-only job will be merged in) --- has multiple inputs. We should improve that optimizer. +-- With the correlation optimizer, right now, we have +-- 2 MR jobs. The first one will evaluate the sub-query xx and the join of +-- xx and yy. The second one will do the ORDER BY. EXPLAIN SELECT xx.key, xx.cnt, yy.key, yy.value FROM (SELECT x.key AS key, count(1) AS cnt diff --git ql/src/test/queries/clientpositive/multiMapJoin1.q ql/src/test/queries/clientpositive/multiMapJoin1.q index 86b0586..4f7f587 100644 --- ql/src/test/queries/clientpositive/multiMapJoin1.q +++ ql/src/test/queries/clientpositive/multiMapJoin1.q @@ -68,11 +68,9 @@ select count(*) FROM bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 on (bigTbl.key = smallTbl1.key) ) firstjoin -JOIN +JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value); -set hive.optimize.mapjoin.mapreduce=true; - -- Now run a query with two-way join, which should first be converted into a -- map-join followed by groupby and then finally into a single MR job. @@ -92,10 +90,9 @@ select count(*) FROM bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 on (bigTbl.key = smallTbl1.key) ) firstjoin -JOIN +JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) group by smallTbl2.key; -set hive.optimize.mapjoin.mapreduce=false; create table smallTbl3(key string, value string); insert overwrite table smallTbl3 select * from src where key < 10; @@ -161,8 +158,8 @@ select count(*) FROM set hive.auto.convert.join.noconditionaltask=true; set hive.auto.convert.join.noconditionaltask.size=10000; --- join with 4 tables on different keys is also executed as a single MR job, --- So, overall two jobs - one for multi-way join and one for count(*) +-- Overall we will have a single MR job. The join with 4 tables will be in +-- the Map side. explain select count(*) FROM ( @@ -173,7 +170,7 @@ select count(*) FROM FROM bigTbl JOIN smallTbl1 on (bigTbl.key1 = smallTbl1.key) ) firstjoin - JOIN + JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) ) secondjoin JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key); @@ -187,41 +184,7 @@ select count(*) FROM FROM bigTbl JOIN smallTbl1 on (bigTbl.key1 = smallTbl1.key) ) firstjoin - JOIN + JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) ) secondjoin JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key); - -set hive.optimize.mapjoin.mapreduce=true; --- Now run the above query with M-MR optimization --- This should be a single MR job end-to-end. -explain -select count(*) FROM - ( - SELECT firstjoin.key1 as key1, firstjoin.key2 as key2, smallTbl2.key as key3, - firstjoin.value1 as value1, firstjoin.value2 as value2 FROM - (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, - bigTbl.value as value1, bigTbl.value as value2 - FROM bigTbl JOIN smallTbl1 - on (bigTbl.key1 = smallTbl1.key) - ) firstjoin - JOIN - smallTbl2 on (firstjoin.value1 = smallTbl2.value) - ) secondjoin - JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key); - -select count(*) FROM - ( - SELECT firstjoin.key1 as key1, firstjoin.key2 as key2, smallTbl2.key as key3, - firstjoin.value1 as value1, firstjoin.value2 as value2 FROM - (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, - bigTbl.value as value1, bigTbl.value as value2 - FROM bigTbl JOIN smallTbl1 - on (bigTbl.key1 = smallTbl1.key) - ) firstjoin - JOIN - smallTbl2 on (firstjoin.value1 = smallTbl2.value) - ) secondjoin - JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key); - -set hive.optimize.mapjoin.mapreduce=false; diff --git ql/src/test/queries/clientpositive/multiMapJoin2.q ql/src/test/queries/clientpositive/multiMapJoin2.q new file mode 100644 index 0000000..ce6cf6d --- /dev/null +++ ql/src/test/queries/clientpositive/multiMapJoin2.q @@ -0,0 +1,189 @@ +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=6000; + +-- we will generate one MR job. +EXPLAIN +SELECT tmp.key +FROM (SELECT x1.key AS key FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key; + +SELECT tmp.key +FROM (SELECT x1.key AS key FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key; + +set hive.auto.convert.join.noconditionaltask.size=400; +-- Check if the total size of local tables will be +-- larger than the limit that +-- we set through hive.auto.convert.join.noconditionaltask.size (right now, it is +-- 400 bytes). If so, do not merge. +-- For this query, we will merge the MapJoin of x2 and y2 into the MR job +-- for UNION ALL and ORDER BY. But, the MapJoin of x1 and y2 will not be merged +-- into that MR job. +EXPLAIN +SELECT tmp.key +FROM (SELECT x1.key AS key FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key; + +SELECT tmp.key +FROM (SELECT x1.key AS key FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key; + +set hive.auto.convert.join.noconditionaltask.size=6000; +-- We will use two jobs. +-- We will generate one MR job for GROUP BY +-- on x1, one MR job for both the MapJoin of x2 and y2, the UNION ALL, and the +-- ORDER BY. +EXPLAIN +SELECT tmp.key +FROM (SELECT x1.key AS key FROM src1 x1 GROUP BY x1.key + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key; + +SELECT tmp.key +FROM (SELECT x1.key AS key FROM src1 x1 GROUP BY x1.key + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key; + +set hive.optimize.correlation=false; +-- When Correlation Optimizer is disabled, +-- we will use 5 jobs. +-- We will generate one MR job to evaluate the sub-query tmp1, +-- one MR job to evaluate the sub-query tmp2, +-- one MR job for the Join of tmp1 and tmp2, +-- one MR job for aggregation on the result of the Join of tmp1 and tmp2, +-- and one MR job for the ORDER BY. +EXPLAIN +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt; + +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt; + +set hive.optimize.correlation=true; +-- When Correlation Optimizer is enabled, +-- we will use two jobs. This first MR job will evaluate sub-queries of tmp1, tmp2, +-- the Join of tmp1 and tmp2, and the aggregation on the result of the Join of +-- tmp1 and tmp2. The second job will do the ORDER BY. +EXPLAIN +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt; + +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt; + +set hive.optimize.correlation=false; +-- When Correlation Optimizer is disabled, +-- we will use five jobs. +-- We will generate one MR job to evaluate the sub-query tmp1, +-- one MR job to evaluate the sub-query tmp2, +-- one MR job for the Join of tmp1 and tmp2, +-- one MR job for aggregation on the result of the Join of tmp1 and tmp2, +-- and one MR job for the ORDER BY. +EXPLAIN +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src1 x1 + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt; + +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src1 x1 + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt; + +set hive.optimize.correlation=true; +-- When Correlation Optimizer is enabled, +-- we will use two job. This first MR job will evaluate sub-queries of tmp1, tmp2, +-- the Join of tmp1 and tmp2, and the aggregation on the result of the Join of +-- tmp1 and tmp2. The second job will do the ORDER BY. +EXPLAIN +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src1 x1 + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt; + +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src1 x1 + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt; + +-- Check if we can correctly handle partitioned table. +CREATE TABLE part_table(key string, value string) PARTITIONED BY (partitionId int); +INSERT OVERWRITE TABLE part_table PARTITION (partitionId=1) + SELECT key, value FROM src ORDER BY key, value LIMIT 100; +INSERT OVERWRITE TABLE part_table PARTITION (partitionId=2) + SELECT key, value FROM src1 ORDER BY key, value; + +EXPLAIN +SELECT count(*) +FROM part_table x JOIN src1 y ON (x.key = y.key); + +SELECT count(*) +FROM part_table x JOIN src1 y ON (x.key = y.key); + diff --git ql/src/test/queries/clientpositive/union34.q ql/src/test/queries/clientpositive/union34.q index a88e395..36bc865 100644 --- ql/src/test/queries/clientpositive/union34.q +++ ql/src/test/queries/clientpositive/union34.q @@ -1,11 +1,3 @@ --- HIVE-4342 --- Maponly union(UNION-13) is merged into non-maponly union(UNION-15) --- In this case, task for UNION-13 should be removed from top-task and merged into task for UNION-15 --- TS[2]-SEL[3]-RS[5]-JOIN[6]-SEL[7]-UNION[15]-SEL[16]-RS[17]-EX[18]-FS[19] --- TS[0]-SEL[1]-RS[4]-JOIN[6] --- TS[8]-SEL[9]-UNION[13]-SEL[14]-UNION[15] --- TS[11]-SEL[12]-UNION[13] - create table src10_1 (key string, value string); create table src10_2 (key string, value string); create table src10_3 (key string, value string); @@ -18,7 +10,8 @@ insert overwrite table src10_3 select * insert overwrite table src10_4 select *; set hive.auto.convert.join=true; - +-- When we convert the Join of sub1 and sub0 into a MapJoin, +-- we can use a single MR job to evaluate this entire query. explain SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) @@ -33,7 +26,10 @@ SELECT * FROM ( ) alias1 order by key; set hive.auto.convert.join=false; - +-- When we do not convert the Join of sub1 and sub0 into a MapJoin, +-- we need to use two MR jobs to evaluate this query. +-- The first job is for the Join of sub1 and sub2. The second job +-- is for the UNION ALL and ORDER BY. explain SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) diff --git ql/src/test/results/clientpositive/auto_join0.q.out ql/src/test/results/clientpositive/auto_join0.q.out index c48181d..a75c01c 100644 --- ql/src/test/results/clientpositive/auto_join0.q.out +++ ql/src/test/results/clientpositive/auto_join0.q.out @@ -25,8 +25,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-2 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-6 Stage-3 depends on stages: Stage-2 Stage-0 is a root stage @@ -62,7 +61,7 @@ STAGE PLANS: 1 [] Position of Big Table: 0 - Stage: Stage-5 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: a:src1:src @@ -102,40 +101,29 @@ STAGE PLANS: expr: _col3 type: string outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + sort order: ++++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - sort order: ++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string Reduce Operator Tree: Extract Select Operator diff --git ql/src/test/results/clientpositive/auto_join10.q.out ql/src/test/results/clientpositive/auto_join10.q.out index deb8eb5..8afa3df 100644 --- ql/src/test/results/clientpositive/auto_join10.q.out +++ ql/src/test/results/clientpositive/auto_join10.q.out @@ -19,8 +19,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-5 Stage-0 is a root stage STAGE PLANS: @@ -51,7 +50,7 @@ STAGE PLANS: 1 [Column[_col0]] Position of Big Table: 0 - Stage: Stage-4 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: x:src @@ -87,25 +86,14 @@ STAGE PLANS: bucketGroup: false mode: hash outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint Reduce Operator Tree: Group By Operator aggregations: diff --git ql/src/test/results/clientpositive/auto_join11.q.out ql/src/test/results/clientpositive/auto_join11.q.out index 82bc3f9..bd0df67 100644 --- ql/src/test/results/clientpositive/auto_join11.q.out +++ ql/src/test/results/clientpositive/auto_join11.q.out @@ -19,8 +19,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-5 Stage-0 is a root stage STAGE PLANS: @@ -53,7 +52,7 @@ STAGE PLANS: 1 [Column[_col0]] Position of Big Table: 1 - Stage: Stage-4 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: src2:src @@ -95,25 +94,14 @@ STAGE PLANS: bucketGroup: false mode: hash outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint Reduce Operator Tree: Group By Operator aggregations: diff --git ql/src/test/results/clientpositive/auto_join12.q.out ql/src/test/results/clientpositive/auto_join12.q.out index 1a170cb..94440c1 100644 --- ql/src/test/results/clientpositive/auto_join12.q.out +++ ql/src/test/results/clientpositive/auto_join12.q.out @@ -25,8 +25,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-2 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-6 Stage-0 is a root stage STAGE PLANS: @@ -87,7 +86,7 @@ STAGE PLANS: 2 [Column[_col0]] Position of Big Table: 1 - Stage: Stage-5 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: src2:src @@ -132,25 +131,14 @@ STAGE PLANS: bucketGroup: false mode: hash outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint Reduce Operator Tree: Group By Operator aggregations: diff --git ql/src/test/results/clientpositive/auto_join13.q.out ql/src/test/results/clientpositive/auto_join13.q.out index 948ca70..7f86f8e 100644 --- ql/src/test/results/clientpositive/auto_join13.q.out +++ ql/src/test/results/clientpositive/auto_join13.q.out @@ -25,8 +25,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-8 is a root stage - Stage-7 depends on stages: Stage-8 - Stage-3 depends on stages: Stage-7 + Stage-3 depends on stages: Stage-8 Stage-0 is a root stage STAGE PLANS: @@ -83,7 +82,7 @@ STAGE PLANS: 1 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[_col0]()] Position of Big Table: 0 - Stage: Stage-7 + Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: src2:src @@ -137,25 +136,14 @@ STAGE PLANS: bucketGroup: false mode: hash outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work - - Stage: Stage-3 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint Reduce Operator Tree: Group By Operator aggregations: diff --git ql/src/test/results/clientpositive/auto_join15.q.out ql/src/test/results/clientpositive/auto_join15.q.out index aa40cff..6fb0ea6 100644 --- ql/src/test/results/clientpositive/auto_join15.q.out +++ ql/src/test/results/clientpositive/auto_join15.q.out @@ -19,8 +19,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-2 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-6 Stage-3 depends on stages: Stage-2 Stage-0 is a root stage @@ -45,7 +44,7 @@ STAGE PLANS: 1 [Column[key]] Position of Big Table: 1 - Stage: Stage-5 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: a:src2 @@ -74,40 +73,29 @@ STAGE PLANS: expr: _col5 type: string outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + sort order: ++++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - sort order: ++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string Reduce Operator Tree: Extract Select Operator diff --git ql/src/test/results/clientpositive/auto_join16.q.out ql/src/test/results/clientpositive/auto_join16.q.out index 06d73d8..9c297e8 100644 --- ql/src/test/results/clientpositive/auto_join16.q.out +++ ql/src/test/results/clientpositive/auto_join16.q.out @@ -19,8 +19,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-5 Stage-0 is a root stage STAGE PLANS: @@ -48,7 +47,7 @@ STAGE PLANS: 1 [Column[key], Column[value]] Position of Big Table: 0 - Stage: Stage-4 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: subq:a @@ -90,25 +89,14 @@ STAGE PLANS: bucketGroup: false mode: hash outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint Reduce Operator Tree: Group By Operator aggregations: diff --git ql/src/test/results/clientpositive/auto_join2.q.out ql/src/test/results/clientpositive/auto_join2.q.out index a11f347..1651f0d 100644 --- ql/src/test/results/clientpositive/auto_join2.q.out +++ ql/src/test/results/clientpositive/auto_join2.q.out @@ -16,8 +16,8 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-8 is a root stage - Stage-7 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-7 + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-6 Stage-3 depends on stages: Stage-0 STAGE PLANS: @@ -56,7 +56,7 @@ STAGE PLANS: 1 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[key]()] Position of Big Table: 0 - Stage: Stage-7 + Stage: Stage-6 Map Reduce Alias -> Map Operator Tree: src2 diff --git ql/src/test/results/clientpositive/auto_join20.q.out ql/src/test/results/clientpositive/auto_join20.q.out index cae120a..6dd8ff7 100644 --- ql/src/test/results/clientpositive/auto_join20.q.out +++ ql/src/test/results/clientpositive/auto_join20.q.out @@ -19,8 +19,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-7 is a root stage - Stage-6 depends on stages: Stage-7 - Stage-2 depends on stages: Stage-6 + Stage-2 depends on stages: Stage-7 Stage-3 depends on stages: Stage-2 Stage-0 is a root stage @@ -80,7 +79,7 @@ STAGE PLANS: 2 [Column[key]] Position of Big Table: 2 - Stage: Stage-6 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: a:src3 @@ -120,48 +119,37 @@ STAGE PLANS: expr: _col9 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + sort order: ++++++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - sort order: ++++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string Reduce Operator Tree: Extract Select Operator @@ -266,8 +254,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-7 is a root stage - Stage-6 depends on stages: Stage-7 - Stage-2 depends on stages: Stage-6 + Stage-2 depends on stages: Stage-7 Stage-3 depends on stages: Stage-2 Stage-0 is a root stage @@ -327,7 +314,7 @@ STAGE PLANS: 2 [Column[key]] Position of Big Table: 2 - Stage: Stage-6 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: a:src3 @@ -367,48 +354,37 @@ STAGE PLANS: expr: _col9 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + sort order: ++++++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - sort order: ++++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string Reduce Operator Tree: Extract Select Operator diff --git ql/src/test/results/clientpositive/auto_join21.q.out ql/src/test/results/clientpositive/auto_join21.q.out index 423094d..e4ee758 100644 --- ql/src/test/results/clientpositive/auto_join21.q.out +++ ql/src/test/results/clientpositive/auto_join21.q.out @@ -9,8 +9,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-2 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-6 Stage-0 is a root stage STAGE PLANS: @@ -65,7 +64,7 @@ STAGE PLANS: 2 [Column[key]] Position of Big Table: 2 - Stage: Stage-5 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: src3 @@ -105,48 +104,37 @@ STAGE PLANS: expr: _col9 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + sort order: ++++++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - sort order: ++++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string Reduce Operator Tree: Extract File Output Operator diff --git ql/src/test/results/clientpositive/auto_join22.q.out ql/src/test/results/clientpositive/auto_join22.q.out index 6f418db..a176270 100644 --- ql/src/test/results/clientpositive/auto_join22.q.out +++ ql/src/test/results/clientpositive/auto_join22.q.out @@ -9,8 +9,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-8 is a root stage - Stage-7 depends on stages: Stage-8 - Stage-3 depends on stages: Stage-7 + Stage-3 depends on stages: Stage-8 Stage-0 is a root stage STAGE PLANS: @@ -49,7 +48,7 @@ STAGE PLANS: 1 [Column[_col2]] Position of Big Table: 1 - Stage: Stage-7 + Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: src5:src3:src2 @@ -97,25 +96,14 @@ STAGE PLANS: bucketGroup: false mode: hash outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work - - Stage: Stage-3 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint Reduce Operator Tree: Group By Operator aggregations: diff --git ql/src/test/results/clientpositive/auto_join23.q.out ql/src/test/results/clientpositive/auto_join23.q.out index 6a6bc6c..e4fde24 100644 --- ql/src/test/results/clientpositive/auto_join23.q.out +++ ql/src/test/results/clientpositive/auto_join23.q.out @@ -9,8 +9,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-5 Stage-0 is a root stage STAGE PLANS: @@ -38,7 +37,7 @@ STAGE PLANS: 1 [] Position of Big Table: 1 - Stage: Stage-4 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: src2 @@ -71,40 +70,29 @@ STAGE PLANS: expr: _col5 type: string outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + sort order: ++++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - sort order: ++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string Reduce Operator Tree: Extract File Output Operator diff --git ql/src/test/results/clientpositive/auto_join24.q.out ql/src/test/results/clientpositive/auto_join24.q.out index c7e872e..ab37c4b 100644 --- ql/src/test/results/clientpositive/auto_join24.q.out +++ ql/src/test/results/clientpositive/auto_join24.q.out @@ -28,8 +28,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-5 Stage-0 is a root stage STAGE PLANS: @@ -53,7 +52,7 @@ STAGE PLANS: 1 [Column[key]] Position of Big Table: 1 - Stage: Stage-4 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: b @@ -82,25 +81,14 @@ STAGE PLANS: bucketGroup: false mode: hash outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint Reduce Operator Tree: Group By Operator aggregations: diff --git ql/src/test/results/clientpositive/auto_join26.q.out ql/src/test/results/clientpositive/auto_join26.q.out index 7268755..4699062 100644 --- ql/src/test/results/clientpositive/auto_join26.q.out +++ ql/src/test/results/clientpositive/auto_join26.q.out @@ -16,8 +16,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-2 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-6 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 @@ -42,7 +41,7 @@ STAGE PLANS: 1 [Column[key]] Position of Big Table: 1 - Stage: Stage-5 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: y @@ -74,31 +73,20 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint Reduce Operator Tree: Group By Operator aggregations: diff --git ql/src/test/results/clientpositive/auto_join28.q.out ql/src/test/results/clientpositive/auto_join28.q.out index 89db4aa..cb37cf4 100644 --- ql/src/test/results/clientpositive/auto_join28.q.out +++ ql/src/test/results/clientpositive/auto_join28.q.out @@ -9,8 +9,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-2 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-6 Stage-0 is a root stage STAGE PLANS: @@ -65,7 +64,7 @@ STAGE PLANS: 2 [Column[key]] Position of Big Table: 2 - Stage: Stage-5 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: src3 @@ -105,48 +104,37 @@ STAGE PLANS: expr: _col9 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + sort order: ++++++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - sort order: ++++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string Reduce Operator Tree: Extract File Output Operator @@ -172,8 +160,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-2 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-6 Stage-0 is a root stage STAGE PLANS: @@ -232,7 +219,7 @@ STAGE PLANS: 2 [Column[key]] Position of Big Table: 0 - Stage: Stage-5 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: src1 @@ -272,48 +259,37 @@ STAGE PLANS: expr: _col9 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + sort order: ++++++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - sort order: ++++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string Reduce Operator Tree: Extract File Output Operator @@ -339,8 +315,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-2 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-6 Stage-0 is a root stage STAGE PLANS: @@ -399,7 +374,7 @@ STAGE PLANS: 2 [Column[key]] Position of Big Table: 1 - Stage: Stage-5 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: src2 @@ -439,48 +414,37 @@ STAGE PLANS: expr: _col9 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + sort order: ++++++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - sort order: ++++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string Reduce Operator Tree: Extract File Output Operator @@ -506,8 +470,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-2 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-6 Stage-0 is a root stage STAGE PLANS: @@ -562,7 +525,7 @@ STAGE PLANS: 2 [Column[key]] Position of Big Table: 2 - Stage: Stage-5 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: src3 @@ -602,48 +565,37 @@ STAGE PLANS: expr: _col9 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + sort order: ++++++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - sort order: ++++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string Reduce Operator Tree: Extract File Output Operator diff --git ql/src/test/results/clientpositive/auto_join29.q.out ql/src/test/results/clientpositive/auto_join29.q.out index c3744f3..c88d472 100644 --- ql/src/test/results/clientpositive/auto_join29.q.out +++ ql/src/test/results/clientpositive/auto_join29.q.out @@ -9,8 +9,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-2 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-6 Stage-0 is a root stage STAGE PLANS: @@ -65,7 +64,7 @@ STAGE PLANS: 2 [Column[key]] Position of Big Table: 2 - Stage: Stage-5 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: src3 @@ -105,48 +104,37 @@ STAGE PLANS: expr: _col9 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + sort order: ++++++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - sort order: ++++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string Reduce Operator Tree: Extract File Output Operator @@ -680,8 +668,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-2 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-6 Stage-0 is a root stage STAGE PLANS: @@ -740,7 +727,7 @@ STAGE PLANS: 2 [Column[key]] Position of Big Table: 0 - Stage: Stage-5 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: src1 @@ -780,48 +767,37 @@ STAGE PLANS: expr: _col9 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + sort order: ++++++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - sort order: ++++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string Reduce Operator Tree: Extract File Output Operator @@ -1355,8 +1331,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-2 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-6 Stage-0 is a root stage STAGE PLANS: @@ -1415,7 +1390,7 @@ STAGE PLANS: 2 [Column[key]] Position of Big Table: 1 - Stage: Stage-5 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: src2 @@ -1455,48 +1430,37 @@ STAGE PLANS: expr: _col9 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + sort order: ++++++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - sort order: ++++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string Reduce Operator Tree: Extract File Output Operator @@ -2042,8 +2006,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-2 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-6 Stage-0 is a root stage STAGE PLANS: @@ -2098,7 +2061,7 @@ STAGE PLANS: 2 [Column[key]] Position of Big Table: 2 - Stage: Stage-5 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: src3 @@ -2138,48 +2101,37 @@ STAGE PLANS: expr: _col9 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + sort order: ++++++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - sort order: ++++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string Reduce Operator Tree: Extract File Output Operator @@ -2725,8 +2677,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-2 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-6 Stage-0 is a root stage STAGE PLANS: @@ -2777,7 +2728,7 @@ STAGE PLANS: 2 [Column[key]] Position of Big Table: 1 - Stage: Stage-5 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: src2 @@ -2817,48 +2768,37 @@ STAGE PLANS: expr: _col9 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + sort order: ++++++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - sort order: ++++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string Reduce Operator Tree: Extract File Output Operator @@ -2892,8 +2832,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-2 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-6 Stage-0 is a root stage STAGE PLANS: @@ -2952,7 +2891,7 @@ STAGE PLANS: 2 [Column[key]] Position of Big Table: 2 - Stage: Stage-5 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: src3 @@ -2992,48 +2931,37 @@ STAGE PLANS: expr: _col9 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + sort order: ++++++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - sort order: ++++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string Reduce Operator Tree: Extract File Output Operator @@ -3567,8 +3495,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-2 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-6 Stage-0 is a root stage STAGE PLANS: @@ -3627,7 +3554,7 @@ STAGE PLANS: 2 [Column[key]] Position of Big Table: 0 - Stage: Stage-5 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: src1 @@ -3667,48 +3594,37 @@ STAGE PLANS: expr: _col9 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + sort order: ++++++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - sort order: ++++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string Reduce Operator Tree: Extract File Output Operator @@ -3742,8 +3658,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-2 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-6 Stage-0 is a root stage STAGE PLANS: @@ -3802,7 +3717,7 @@ STAGE PLANS: 2 [Column[key]] Position of Big Table: 1 - Stage: Stage-5 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: src2 @@ -3846,48 +3761,37 @@ STAGE PLANS: expr: _col9 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + sort order: ++++++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - sort order: ++++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string Reduce Operator Tree: Extract File Output Operator @@ -3943,8 +3847,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-2 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-6 Stage-0 is a root stage STAGE PLANS: @@ -3995,7 +3898,7 @@ STAGE PLANS: 2 [Column[key]] Position of Big Table: 1 - Stage: Stage-5 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: src2 @@ -4035,48 +3938,37 @@ STAGE PLANS: expr: _col9 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + sort order: ++++++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - sort order: ++++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string Reduce Operator Tree: Extract File Output Operator diff --git ql/src/test/results/clientpositive/auto_join32.q.out ql/src/test/results/clientpositive/auto_join32.q.out index 312664a..7f4822b 100644 --- ql/src/test/results/clientpositive/auto_join32.q.out +++ ql/src/test/results/clientpositive/auto_join32.q.out @@ -25,8 +25,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-5 Stage-0 is a root stage STAGE PLANS: @@ -50,7 +49,7 @@ STAGE PLANS: 1 [Column[name]] Position of Big Table: 1 - Stage: Stage-4 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: v @@ -86,33 +85,22 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint Reduce Operator Tree: Group By Operator aggregations: diff --git ql/src/test/results/clientpositive/auto_join33.q.out ql/src/test/results/clientpositive/auto_join33.q.out deleted file mode 100644 index 8fc0e84..0000000 --- ql/src/test/results/clientpositive/auto_join33.q.out +++ /dev/null @@ -1,148 +0,0 @@ -PREHOOK: query: -- empty tables -create table studenttab10k (name string, age int, gpa double) -PREHOOK: type: CREATETABLE -POSTHOOK: query: -- empty tables -create table studenttab10k (name string, age int, gpa double) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@studenttab10k -PREHOOK: query: create table votertab10k (name string, age int, registration string, contributions float) -PREHOOK: type: CREATETABLE -POSTHOOK: query: create table votertab10k (name string, age int, registration string, contributions float) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@votertab10k -PREHOOK: query: explain select s.name, count(distinct registration) -from studenttab10k s join votertab10k v -on (s.name = v.name) -group by s.name -PREHOOK: type: QUERY -POSTHOOK: query: explain select s.name, count(distinct registration) -from studenttab10k s join votertab10k v -on (s.name = v.name) -group by s.name -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME studenttab10k) s) (TOK_TABREF (TOK_TABNAME votertab10k) v) (= (. (TOK_TABLE_OR_COL s) name) (. (TOK_TABLE_OR_COL v) name)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s) name)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL registration)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL s) name)))) - -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - s - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - s - TableScan - alias: s - HashTable Sink Operator - condition expressions: - 0 {name} - 1 {registration} - handleSkewJoin: false - keys: - 0 [Column[name]] - 1 [Column[name]] - Position of Big Table: 1 - - Stage: Stage-4 - Map Reduce - Alias -> Map Operator Tree: - v - TableScan - alias: v - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {name} - 1 {registration} - handleSkewJoin: false - keys: - 0 [Column[name]] - 1 [Column[name]] - outputColumnNames: _col0, _col7 - Position of Big Table: 1 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col7 - type: string - outputColumnNames: _col0, _col7 - Group By Operator - aggregations: - expr: count(DISTINCT _col7) - bucketGroup: false - keys: - expr: _col0 - type: string - expr: _col7 - type: string - mode: hash - outputColumnNames: _col0, _col1, _col2 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: select s.name, count(distinct registration) -from studenttab10k s join votertab10k v -on (s.name = v.name) -group by s.name -PREHOOK: type: QUERY -PREHOOK: Input: default@studenttab10k -PREHOOK: Input: default@votertab10k -#### A masked pattern was here #### -POSTHOOK: query: select s.name, count(distinct registration) -from studenttab10k s join votertab10k v -on (s.name = v.name) -group by s.name -POSTHOOK: type: QUERY -POSTHOOK: Input: default@studenttab10k -POSTHOOK: Input: default@votertab10k -#### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out index da375f6..0cd7734 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out @@ -290,8 +290,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-1 is a root stage Stage-6 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-6 - Stage-3 depends on stages: Stage-5 + Stage-3 depends on stages: Stage-6 Stage-0 is a root stage STAGE PLANS: @@ -382,7 +381,7 @@ STAGE PLANS: 1 [Column[_col0]] Position of Big Table: 0 - Stage: Stage-5 + Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: $INTNAME @@ -404,25 +403,14 @@ STAGE PLANS: bucketGroup: false mode: hash outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work - - Stage: Stage-3 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint Reduce Operator Tree: Group By Operator aggregations: diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out index 9769bd8..a44e4f8 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out @@ -84,8 +84,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-5 Stage-0 is a root stage STAGE PLANS: @@ -110,7 +109,7 @@ STAGE PLANS: 1 [Column[key]] Position of Big Table: 1 - Stage: Stage-4 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: b @@ -135,21 +134,12 @@ STAGE PLANS: bucketGroup: false mode: hash outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Path -> Alias: @@ -299,38 +289,6 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_big/ds=2008-04-08 [b] /bucket_big/ds=2008-04-09 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - Truncated Path -> Alias: -#### A masked pattern was here #### Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -402,8 +360,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-5 Stage-0 is a root stage STAGE PLANS: @@ -428,7 +385,7 @@ STAGE PLANS: 1 [Column[key]] Position of Big Table: 1 - Stage: Stage-4 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: b @@ -453,21 +410,12 @@ STAGE PLANS: bucketGroup: false mode: hash outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Path -> Alias: @@ -617,38 +565,6 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_big/ds=2008-04-08 [b] /bucket_big/ds=2008-04-09 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - Truncated Path -> Alias: -#### A masked pattern was here #### Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out index 5c4ba5b..51dc8e0 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out @@ -112,8 +112,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-8 is a root stage - Stage-7 depends on stages: Stage-8 - Stage-2 depends on stages: Stage-7 + Stage-2 depends on stages: Stage-8 Stage-0 is a root stage STAGE PLANS: @@ -174,7 +173,7 @@ STAGE PLANS: 1 [] Position of Big Table: 0 - Stage: Stage-7 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: c @@ -219,23 +218,18 @@ STAGE PLANS: bucketGroup: false mode: hash outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work +<<<<<<< HEAD + Needs Tagging: false +======= +>>>>>>> upstream/trunk Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -431,6 +425,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small name: default.bucket_small +<<<<<<< HEAD +======= Truncated Path -> Alias: /bucket_big/ds=2008-04-08 [c] /bucket_big/ds=2008-04-09 [c] @@ -467,6 +463,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false +>>>>>>> upstream/trunk Reduce Operator Tree: Group By Operator aggregations: @@ -497,6 +494,12 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false +<<<<<<< HEAD + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [c] + /bucket_big/ds=2008-04-09 [c] +======= +>>>>>>> upstream/trunk Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out index 6add99a..96fcd2b 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out @@ -1460,8 +1460,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-5 Stage-0 is a root stage STAGE PLANS: @@ -1490,7 +1489,7 @@ STAGE PLANS: 1 [Column[_col0]] Position of Big Table: 0 - Stage: Stage-4 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: subq1:a @@ -1519,25 +1518,14 @@ STAGE PLANS: bucketGroup: false mode: hash outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint Reduce Operator Tree: Group By Operator aggregations: diff --git ql/src/test/results/clientpositive/correlationoptimizer1.q.out ql/src/test/results/clientpositive/correlationoptimizer1.q.out index db3bd78..2b8a149 100644 --- ql/src/test/results/clientpositive/correlationoptimizer1.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer1.q.out @@ -349,199 +349,6 @@ POSTHOOK: Input: default@src1 PREHOOK: query: -- Enable hive.auto.convert.join. -- Correlation Optimizer will detect that the join will be converted to a Map-join, -- so it will not try to optimize this query. -EXPLAIN -SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) -FROM (SELECT x.key AS key, count(1) AS cnt - FROM src1 x JOIN src y ON (x.key = y.key) - GROUP BY x.key) tmp -PREHOOK: type: QUERY -POSTHOOK: query: -- Enable hive.auto.convert.join. --- Correlation Optimizer will detect that the join will be converted to a Map-join, --- so it will not try to optimize this query. -EXPLAIN -SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) -FROM (SELECT x.key AS key, count(1) AS cnt - FROM src1 x JOIN src y ON (x.key = y.key) - GROUP BY x.key) tmp -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - -STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-2 depends on stages: Stage-5 - Stage-3 depends on stages: Stage-2 - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-6 - Map Reduce Local Work - Alias -> Map Local Tables: - tmp:x - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - tmp:x - TableScan - alias: x - HashTable Sink Operator - condition expressions: - 0 {key} - 1 - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - Position of Big Table: 1 - - Stage: Stage-5 - Map Reduce - Alias -> Map Operator Tree: - tmp:y - TableScan - alias: y - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0 - Position of Big Table: 1 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string - mode: hash - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - outputColumnNames: _col0, _col1 - Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - bucketGroup: false - mode: hash - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-3 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - Reduce Operator Tree: - Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) -FROM (SELECT x.key AS key, count(1) AS cnt - FROM src1 x JOIN src y ON (x.key = y.key) - GROUP BY x.key) tmp -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) -FROM (SELECT x.key AS key, count(1) AS cnt - FROM src1 x JOIN src y ON (x.key = y.key) - GROUP BY x.key) tmp -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -652447 37 -PREHOOK: query: -- Enable hive.auto.convert.join. --- Correlation Optimizer will detect that the join will be converted to a Map-join, --- so it will not try to optimize this query. -- We should generate 1 MR job for subquery tmp. EXPLAIN SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) @@ -564,8 +371,8 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-3 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-6 + Stage-3 depends on stages: Stage-2 Stage-0 is a root stage STAGE PLANS: @@ -589,7 +396,7 @@ STAGE PLANS: 1 [Column[key]] Position of Big Table: 1 - Stage: Stage-5 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: tmp:y diff --git ql/src/test/results/clientpositive/correlationoptimizer3.q.out ql/src/test/results/clientpositive/correlationoptimizer3.q.out index cfa7eff..a6691ae 100644 --- ql/src/test/results/clientpositive/correlationoptimizer3.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer3.q.out @@ -598,11 +598,8 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-9 is a root stage - Stage-7 depends on stages: Stage-9 - Stage-2 depends on stages: Stage-7, Stage-8 + Stage-2 depends on stages: Stage-9 Stage-3 depends on stages: Stage-2 - Stage-10 is a root stage - Stage-8 depends on stages: Stage-10 Stage-0 is a root stage STAGE PLANS: @@ -612,6 +609,9 @@ STAGE PLANS: tmp:b:x Fetch Operator limit: -1 + tmp:d:x + Fetch Operator + limit: -1 Alias -> Map Local Operator Tree: tmp:b:x TableScan @@ -625,8 +625,20 @@ STAGE PLANS: 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 + tmp:d:x + TableScan + alias: x + HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 1 - Stage: Stage-7 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: tmp:b:y @@ -658,44 +670,54 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + value expressions: + expr: _col1 + type: bigint + tmp:d:y + TableScan + alias: y + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 1 + value expressions: + expr: _col1 + type: string Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: - $INTNAME - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col1 - type: bigint - $INTNAME1 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col1 - type: string Reduce Operator Tree: Demux Operator Group By Operator @@ -818,60 +840,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - Stage: Stage-10 - Map Reduce Local Work - Alias -> Map Local Tables: - tmp:d:x - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - tmp:d:x - TableScan - alias: x - HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - Position of Big Table: 1 - - Stage: Stage-8 - Map Reduce - Alias -> Map Operator Tree: - tmp:d:y - TableScan - alias: y - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0, _col1 - Position of Big Table: 1 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - Stage: Stage-0 Fetch Operator limit: -1 @@ -1482,11 +1450,8 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-9 is a root stage - Stage-7 depends on stages: Stage-9 - Stage-2 depends on stages: Stage-7, Stage-8 + Stage-2 depends on stages: Stage-9 Stage-3 depends on stages: Stage-2 - Stage-10 is a root stage - Stage-8 depends on stages: Stage-10 Stage-0 is a root stage STAGE PLANS: @@ -1496,6 +1461,9 @@ STAGE PLANS: tmp:b:x Fetch Operator limit: -1 + tmp:d:x + Fetch Operator + limit: -1 Alias -> Map Local Operator Tree: tmp:b:x TableScan @@ -1509,8 +1477,20 @@ STAGE PLANS: 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 + tmp:d:x + TableScan + alias: x + HashTable Sink Operator + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 1 - Stage: Stage-7 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: tmp:b:y @@ -1535,44 +1515,61 @@ STAGE PLANS: expr: _col1 type: string outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + value expressions: + expr: _col1 + type: string + tmp:d:y + TableScan + alias: y + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 1 + value expressions: + expr: _col1 + type: bigint Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: - $INTNAME - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col1 - type: string - $INTNAME1 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col1 - type: bigint Reduce Operator Tree: Demux Operator Mux Operator @@ -1695,67 +1692,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - Stage: Stage-10 - Map Reduce Local Work - Alias -> Map Local Tables: - tmp:d:x - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - tmp:d:x - TableScan - alias: x - HashTable Sink Operator - condition expressions: - 0 {key} - 1 - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - Position of Big Table: 1 - - Stage: Stage-8 - Map Reduce - Alias -> Map Operator Tree: - tmp:d:y - TableScan - alias: y - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0 - Position of Big Table: 1 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string - mode: hash - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/correlationoptimizer4.q.out ql/src/test/results/clientpositive/correlationoptimizer4.q.out index 285a54f..3605619 100644 --- ql/src/test/results/clientpositive/correlationoptimizer4.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer4.q.out @@ -428,8 +428,8 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-7 is a root stage - Stage-6 depends on stages: Stage-7 - Stage-3 depends on stages: Stage-6 + Stage-2 depends on stages: Stage-7 + Stage-3 depends on stages: Stage-2 Stage-0 is a root stage STAGE PLANS: @@ -472,7 +472,7 @@ STAGE PLANS: 2 [Column[key]] Position of Big Table: 0 - Stage: Stage-6 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: tmp:x diff --git ql/src/test/results/clientpositive/correlationoptimizer6.q.out ql/src/test/results/clientpositive/correlationoptimizer6.q.out index b0438e6..d8e1f29 100644 --- ql/src/test/results/clientpositive/correlationoptimizer6.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer6.q.out @@ -661,21 +661,33 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-9 is a root stage - Stage-7 depends on stages: Stage-9 - Stage-2 depends on stages: Stage-7, Stage-8 + Stage-2 depends on stages: Stage-9 Stage-3 depends on stages: Stage-2 - Stage-10 is a root stage - Stage-8 depends on stages: Stage-10 Stage-0 is a root stage STAGE PLANS: Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: + xx:y + Fetch Operator + limit: -1 yy:y Fetch Operator limit: -1 Alias -> Map Local Operator Tree: + xx:y + TableScan + alias: y + HashTable Sink Operator + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 yy:y TableScan alias: y @@ -689,9 +701,50 @@ STAGE PLANS: 1 [Column[key]] Position of Big Table: 0 - Stage: Stage-7 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: + xx:x + TableScan + alias: x + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + value expressions: + expr: _col1 + type: bigint yy:x TableScan alias: x @@ -721,44 +774,20 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 1 + value expressions: + expr: _col1 + type: bigint Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: - $INTNAME - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col1 - type: bigint - $INTNAME1 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col1 - type: bigint Reduce Operator Tree: Demux Operator Group By Operator @@ -880,67 +909,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - Stage: Stage-10 - Map Reduce Local Work - Alias -> Map Local Tables: - xx:y - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - xx:y - TableScan - alias: y - HashTable Sink Operator - condition expressions: - 0 {key} - 1 - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - Position of Big Table: 0 - - Stage: Stage-8 - Map Reduce - Alias -> Map Operator Tree: - xx:x - TableScan - alias: x - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string - mode: hash - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - Stage: Stage-0 Fetch Operator limit: -1 @@ -5122,16 +5090,15 @@ ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src1) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL x) value)))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) cnt))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) value)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) cnt))))) STAGE DEPENDENCIES: - Stage-10 is a root stage - Stage-8 depends on stages: Stage-10 - Stage-3 depends on stages: Stage-8, Stage-9 - Stage-4 depends on stages: Stage-3 Stage-11 is a root stage - Stage-9 depends on stages: Stage-11 + Stage-2 depends on stages: Stage-11 + Stage-10 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-10 + Stage-4 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-10 + Stage: Stage-11 Map Reduce Local Work Alias -> Map Local Tables: yy:y @@ -5151,7 +5118,7 @@ STAGE PLANS: 1 [Column[key]] Position of Big Table: 0 - Stage: Stage-8 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: yy:x @@ -5233,6 +5200,26 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Stage: Stage-10 + Map Reduce Local Work + Alias -> Map Local Tables: + xx:y + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + xx:y + TableScan + alias: y + HashTable Sink Operator + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: @@ -5253,19 +5240,49 @@ STAGE PLANS: type: string expr: _col2 type: bigint - $INTNAME1 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col1 - type: bigint + xx:x + TableScan + alias: x + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + value expressions: + expr: _col1 + type: bigint + Local Work: + Map Reduce Local Work Reduce Operator Tree: Demux Operator Group By Operator @@ -5379,67 +5396,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - Stage: Stage-11 - Map Reduce Local Work - Alias -> Map Local Tables: - xx:y - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - xx:y - TableScan - alias: y - HashTable Sink Operator - condition expressions: - 0 {key} - 1 - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - Position of Big Table: 0 - - Stage: Stage-9 - Map Reduce - Alias -> Map Operator Tree: - xx:x - TableScan - alias: x - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string - mode: hash - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/correlationoptimizer7.q.out ql/src/test/results/clientpositive/correlationoptimizer7.q.out index f8db2bf..7a17edf 100644 --- ql/src/test/results/clientpositive/correlationoptimizer7.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer7.q.out @@ -19,11 +19,9 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-9 is a root stage - Stage-7 depends on stages: Stage-9 - Stage-4 depends on stages: Stage-7 + Stage-4 depends on stages: Stage-9 Stage-8 depends on stages: Stage-4 - Stage-6 depends on stages: Stage-8 - Stage-2 depends on stages: Stage-6 + Stage-2 depends on stages: Stage-8 Stage-0 is a root stage STAGE PLANS: @@ -47,7 +45,7 @@ STAGE PLANS: 1 [Column[key]] Position of Big Table: 0 - Stage: Stage-7 + Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: xx:x @@ -79,31 +77,20 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint Local Work: Map Reduce Local Work - - Stage: Stage-4 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint Reduce Operator Tree: Group By Operator aggregations: @@ -148,7 +135,7 @@ STAGE PLANS: 1 [Column[key]] Position of Big Table: 0 - Stage: Stage-6 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: $INTNAME @@ -175,40 +162,29 @@ STAGE PLANS: expr: _col3 type: string outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + expr: _col2 + type: string + expr: _col3 + type: string + sort order: ++++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + expr: _col2 + type: string + expr: _col3 + type: string Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string - sort order: ++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string Reduce Operator Tree: Extract File Output Operator @@ -279,8 +255,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-1 depends on stages: Stage-5 + Stage-1 depends on stages: Stage-6 Stage-2 depends on stages: Stage-1 Stage-0 is a root stage @@ -305,7 +280,7 @@ STAGE PLANS: 1 [Column[key]] Position of Big Table: 0 - Stage: Stage-5 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: xx:x @@ -337,31 +312,18 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - $INTNAME - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col1 - type: bigint + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + value expressions: + expr: _col1 + type: bigint yy TableScan alias: yy @@ -379,6 +341,8 @@ STAGE PLANS: type: string expr: value type: string + Local Work: + Map Reduce Local Work Reduce Operator Tree: Demux Operator Group By Operator @@ -527,13 +491,9 @@ POSTHOOK: Input: default@src1 PREHOOK: query: -- Without correlation optimizer, we will have 3 MR jobs. -- The first one is a MapJoin and Aggregation (in the Reduce Phase). -- The second one is another MapJoin. The third one is for ordering. --- With the correlation optimizer, right now, we still have --- 3 MR jobs. The first one is a MapJoin and the map-side aggregation (a map-only job). --- The second one have the reduce-side aggregation and the second join. --- The third one is for ordering. --- Although we have turned on hive.optimize.mapjoin.mapreduce, that optimizer --- can not handle the case that the MR job (the one which a map-only job will be merged in) --- has multiple inputs. We should improve that optimizer. +-- With the correlation optimizer, right now, we have +-- 2 MR jobs. The first one will evaluate the sub-query xx and the join of +-- xx and yy. The second one will do the ORDER BY. EXPLAIN SELECT xx.key, xx.cnt, yy.key, yy.value FROM (SELECT x.key AS key, count(1) AS cnt @@ -545,13 +505,9 @@ PREHOOK: type: QUERY POSTHOOK: query: -- Without correlation optimizer, we will have 3 MR jobs. -- The first one is a MapJoin and Aggregation (in the Reduce Phase). -- The second one is another MapJoin. The third one is for ordering. --- With the correlation optimizer, right now, we still have --- 3 MR jobs. The first one is a MapJoin and the map-side aggregation (a map-only job). --- The second one have the reduce-side aggregation and the second join. --- The third one is for ordering. --- Although we have turned on hive.optimize.mapjoin.mapreduce, that optimizer --- can not handle the case that the MR job (the one which a map-only job will be merged in) --- has multiple inputs. We should improve that optimizer. +-- With the correlation optimizer, right now, we have +-- 2 MR jobs. The first one will evaluate the sub-query xx and the join of +-- xx and yy. The second one will do the ORDER BY. EXPLAIN SELECT xx.key, xx.cnt, yy.key, yy.value FROM (SELECT x.key AS key, count(1) AS cnt @@ -565,9 +521,9 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-9 is a root stage - Stage-7 depends on stages: Stage-9 - Stage-8 depends on stages: Stage-7 - Stage-6 depends on stages: Stage-8 + Stage-4 depends on stages: Stage-9 + Stage-8 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-8 Stage-0 is a root stage STAGE PLANS: @@ -591,7 +547,7 @@ STAGE PLANS: 1 [Column[key]] Position of Big Table: 0 - Stage: Stage-7 + Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: xx:x @@ -681,7 +637,7 @@ STAGE PLANS: 1 [Column[key]] Position of Big Table: 0 - Stage: Stage-6 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: $INTNAME @@ -801,8 +757,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-1 depends on stages: Stage-5 + Stage-1 depends on stages: Stage-6 Stage-2 depends on stages: Stage-1 Stage-0 is a root stage @@ -827,7 +782,7 @@ STAGE PLANS: 1 [Column[key]] Position of Big Table: 0 - Stage: Stage-5 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: xx:x @@ -859,31 +814,18 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - $INTNAME - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col1 - type: bigint + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + value expressions: + expr: _col1 + type: bigint yy TableScan alias: yy @@ -901,6 +843,8 @@ STAGE PLANS: type: string expr: value type: string + Local Work: + Map Reduce Local Work Reduce Operator Tree: Demux Operator Group By Operator diff --git ql/src/test/results/clientpositive/join28.q.out ql/src/test/results/clientpositive/join28.q.out index 60165e2..3652201 100644 --- ql/src/test/results/clientpositive/join28.q.out +++ ql/src/test/results/clientpositive/join28.q.out @@ -28,8 +28,8 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-8 is a root stage - Stage-7 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-7 + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-6 Stage-3 depends on stages: Stage-0 STAGE PLANS: @@ -68,7 +68,7 @@ STAGE PLANS: 1 [Column[key]] Position of Big Table: 0 - Stage: Stage-7 + Stage: Stage-6 Map Reduce Alias -> Map Operator Tree: subq:y diff --git ql/src/test/results/clientpositive/join32.q.out ql/src/test/results/clientpositive/join32.q.out index 41d183b..ab8adf1 100644 --- ql/src/test/results/clientpositive/join32.q.out +++ ql/src/test/results/clientpositive/join32.q.out @@ -24,8 +24,8 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-7 is a root stage - Stage-6 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-6 + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 STAGE PLANS: @@ -66,7 +66,7 @@ STAGE PLANS: 1 [Column[value]] Position of Big Table: 0 - Stage: Stage-6 + Stage: Stage-5 Map Reduce Alias -> Map Operator Tree: y @@ -219,6 +219,52 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src1 name: default.src1 +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 0 + partition_columns ds/hr + rawDataSize 0 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 4 + numPartitions 4 + numRows 0 + partition_columns ds/hr + rawDataSize 0 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 23248 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: /src [y] diff --git ql/src/test/results/clientpositive/join33.q.out ql/src/test/results/clientpositive/join33.q.out index 41d183b..ab8adf1 100644 --- ql/src/test/results/clientpositive/join33.q.out +++ ql/src/test/results/clientpositive/join33.q.out @@ -24,8 +24,8 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-7 is a root stage - Stage-6 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-6 + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 STAGE PLANS: @@ -66,7 +66,7 @@ STAGE PLANS: 1 [Column[value]] Position of Big Table: 0 - Stage: Stage-6 + Stage: Stage-5 Map Reduce Alias -> Map Operator Tree: y @@ -219,6 +219,52 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src1 name: default.src1 +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 0 + partition_columns ds/hr + rawDataSize 0 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 4 + numPartitions 4 + numRows 0 + partition_columns ds/hr + rawDataSize 0 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 23248 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: /src [y] diff --git ql/src/test/results/clientpositive/join_star.q.out ql/src/test/results/clientpositive/join_star.q.out index 797b892..6246d36 100644 --- ql/src/test/results/clientpositive/join_star.q.out +++ ql/src/test/results/clientpositive/join_star.q.out @@ -189,7 +189,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 + Stage-4 depends on stages: Stage-6 Stage-0 is a root stage STAGE PLANS: @@ -228,7 +228,7 @@ STAGE PLANS: 1 [Column[f3]] Position of Big Table: 0 - Stage: Stage-5 + Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: fact @@ -314,7 +314,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 + Stage-4 depends on stages: Stage-6 Stage-0 is a root stage STAGE PLANS: @@ -353,7 +353,7 @@ STAGE PLANS: 1 [Column[f3]] Position of Big Table: 0 - Stage: Stage-5 + Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: fact @@ -439,7 +439,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 + Stage-4 depends on stages: Stage-6 Stage-0 is a root stage STAGE PLANS: @@ -478,7 +478,7 @@ STAGE PLANS: 1 [Column[f3]] Position of Big Table: 0 - Stage: Stage-5 + Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: fact @@ -580,7 +580,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-18 is a root stage - Stage-17 depends on stages: Stage-18 + Stage-12 depends on stages: Stage-18 Stage-0 is a root stage STAGE PLANS: @@ -698,7 +698,7 @@ STAGE PLANS: 1 [Column[f13]] Position of Big Table: 0 - Stage: Stage-17 + Stage: Stage-12 Map Reduce Alias -> Map Operator Tree: fact diff --git ql/src/test/results/clientpositive/mapjoin_filter_on_outerjoin.q.out ql/src/test/results/clientpositive/mapjoin_filter_on_outerjoin.q.out index 0fab62f..5bf3d2a 100644 --- ql/src/test/results/clientpositive/mapjoin_filter_on_outerjoin.q.out +++ ql/src/test/results/clientpositive/mapjoin_filter_on_outerjoin.q.out @@ -254,8 +254,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-2 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-6 Stage-0 is a root stage STAGE PLANS: @@ -314,7 +313,7 @@ STAGE PLANS: 2 [Column[key]] Position of Big Table: 2 - Stage: Stage-5 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: src3 @@ -358,42 +357,31 @@ STAGE PLANS: expr: _col9 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col2 + type: string + expr: _col4 + type: string + sort order: +++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col2 - type: string - expr: _col4 - type: string - sort order: +++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string Reduce Operator Tree: Extract File Output Operator diff --git ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out index 2f5f613..5dac693 100644 --- ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out +++ ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out @@ -11,7 +11,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-7 is a root stage - Stage-6 depends on stages: Stage-7 + Stage-5 depends on stages: Stage-7 Stage-0 is a root stage STAGE PLANS: @@ -50,7 +50,7 @@ STAGE PLANS: 1 [Column[key]] Position of Big Table: 0 - Stage: Stage-6 + Stage: Stage-5 Map Reduce Alias -> Map Operator Tree: srcpart @@ -108,8 +108,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-8 is a root stage - Stage-7 depends on stages: Stage-8 - Stage-3 depends on stages: Stage-7 + Stage-3 depends on stages: Stage-8 Stage-0 is a root stage STAGE PLANS: @@ -148,7 +147,7 @@ STAGE PLANS: 1 [Column[key]] Position of Big Table: 0 - Stage: Stage-7 + Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: srcpart @@ -192,31 +191,20 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint Local Work: Map Reduce Local Work - - Stage: Stage-3 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint Reduce Operator Tree: Group By Operator aggregations: diff --git ql/src/test/results/clientpositive/mapjoin_subquery.q.out ql/src/test/results/clientpositive/mapjoin_subquery.q.out index 8243c2c..ae896bf 100644 --- ql/src/test/results/clientpositive/mapjoin_subquery.q.out +++ ql/src/test/results/clientpositive/mapjoin_subquery.q.out @@ -21,7 +21,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-7 is a root stage - Stage-6 depends on stages: Stage-7 + Stage-5 depends on stages: Stage-7 Stage-0 is a root stage STAGE PLANS: @@ -60,7 +60,7 @@ STAGE PLANS: 1 [Column[key]] Position of Big Table: 0 - Stage: Stage-6 + Stage: Stage-5 Map Reduce Alias -> Map Operator Tree: subq:y @@ -268,8 +268,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-8 is a root stage - Stage-7 depends on stages: Stage-8 - Stage-3 depends on stages: Stage-7 + Stage-3 depends on stages: Stage-8 Stage-0 is a root stage STAGE PLANS: @@ -308,7 +307,7 @@ STAGE PLANS: 1 [Column[key]] Position of Big Table: 0 - Stage: Stage-7 + Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: subq:y @@ -350,32 +349,21 @@ STAGE PLANS: expr: _col5 type: string outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string Local Work: Map Reduce Local Work - - Stage: Stage-3 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string Reduce Operator Tree: Extract File Output Operator diff --git ql/src/test/results/clientpositive/mapjoin_subquery2.q.out ql/src/test/results/clientpositive/mapjoin_subquery2.q.out index 292abe4..b705436 100644 --- ql/src/test/results/clientpositive/mapjoin_subquery2.q.out +++ ql/src/test/results/clientpositive/mapjoin_subquery2.q.out @@ -72,7 +72,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-7 is a root stage - Stage-6 depends on stages: Stage-7 + Stage-5 depends on stages: Stage-7 Stage-0 is a root stage STAGE PLANS: @@ -111,7 +111,7 @@ STAGE PLANS: 1 [Column[id]] Position of Big Table: 0 - Stage: Stage-6 + Stage: Stage-5 Map Reduce Alias -> Map Operator Tree: subq:x diff --git ql/src/test/results/clientpositive/mapjoin_test_outer.q.out ql/src/test/results/clientpositive/mapjoin_test_outer.q.out index 37817d9..accfe50 100644 --- ql/src/test/results/clientpositive/mapjoin_test_outer.q.out +++ ql/src/test/results/clientpositive/mapjoin_test_outer.q.out @@ -1164,8 +1164,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-2 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-6 Stage-0 is a root stage STAGE PLANS: @@ -1208,7 +1207,7 @@ STAGE PLANS: 2 [Column[key]] Position of Big Table: 1 - Stage: Stage-5 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: src2 @@ -1244,48 +1243,37 @@ STAGE PLANS: expr: _col9 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + sort order: ++++++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - sort order: ++++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string Reduce Operator Tree: Extract File Output Operator diff --git ql/src/test/results/clientpositive/multiMapJoin1.q.out ql/src/test/results/clientpositive/multiMapJoin1.q.out index a3f5c53..5dea8f0 100644 --- ql/src/test/results/clientpositive/multiMapJoin1.q.out +++ ql/src/test/results/clientpositive/multiMapJoin1.q.out @@ -130,8 +130,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-7 is a root stage - Stage-6 depends on stages: Stage-7 - Stage-2 depends on stages: Stage-6 + Stage-2 depends on stages: Stage-7 Stage-0 is a root stage STAGE PLANS: @@ -170,7 +169,7 @@ STAGE PLANS: 1 [Column[value]] Position of Big Table: 0 - Stage: Stage-6 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: firstjoin:bigtbl @@ -211,25 +210,14 @@ STAGE PLANS: bucketGroup: false mode: hash outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint Reduce Operator Tree: Group By Operator aggregations: @@ -318,8 +306,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-7 is a root stage - Stage-6 depends on stages: Stage-7 - Stage-2 depends on stages: Stage-6 + Stage-2 depends on stages: Stage-7 Stage-0 is a root stage STAGE PLANS: @@ -358,7 +345,7 @@ STAGE PLANS: 1 [Column[value]] Position of Big Table: 0 - Stage: Stage-6 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: firstjoin:bigtbl @@ -399,25 +386,14 @@ STAGE PLANS: bucketGroup: false mode: hash outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint Reduce Operator Tree: Group By Operator aggregations: @@ -447,7 +423,7 @@ PREHOOK: query: select count(*) FROM bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 on (bigTbl.key = smallTbl1.key) ) firstjoin -JOIN +JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) PREHOOK: type: QUERY PREHOOK: Input: default@bigtbl @@ -459,7 +435,7 @@ POSTHOOK: query: select count(*) FROM bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 on (bigTbl.key = smallTbl1.key) ) firstjoin -JOIN +JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) POSTHOOK: type: QUERY POSTHOOK: Input: default@bigtbl @@ -510,8 +486,8 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-7 is a root stage - Stage-6 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-6 + Stage-2 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-7 @@ -549,7 +525,7 @@ STAGE PLANS: 1 [Column[value]] Position of Big Table: 0 - Stage: Stage-6 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: firstjoin:bigtbl @@ -647,7 +623,7 @@ select count(*) FROM bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 on (bigTbl.key = smallTbl1.key) ) firstjoin -JOIN +JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) group by smallTbl2.key PREHOOK: type: QUERY @@ -660,7 +636,7 @@ select count(*) FROM bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 on (bigTbl.key = smallTbl1.key) ) firstjoin -JOIN +JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) group by smallTbl2.key POSTHOOK: type: QUERY @@ -1414,8 +1390,8 @@ POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 1660 -PREHOOK: query: -- join with 4 tables on different keys is also executed as a single MR job, --- So, overall two jobs - one for multi-way join and one for count(*) +PREHOOK: query: -- Overall we will have a single MR job. The join with 4 tables will be in +-- the Map side. explain select count(*) FROM ( @@ -1426,13 +1402,13 @@ select count(*) FROM FROM bigTbl JOIN smallTbl1 on (bigTbl.key1 = smallTbl1.key) ) firstjoin - JOIN + JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) ) secondjoin JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key) PREHOOK: type: QUERY -POSTHOOK: query: -- join with 4 tables on different keys is also executed as a single MR job, --- So, overall two jobs - one for multi-way join and one for count(*) +POSTHOOK: query: -- Overall we will have a single MR job. The join with 4 tables will be in +-- the Map side. explain select count(*) FROM ( @@ -1443,7 +1419,7 @@ select count(*) FROM FROM bigTbl JOIN smallTbl1 on (bigTbl.key1 = smallTbl1.key) ) firstjoin - JOIN + JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) ) secondjoin JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key) @@ -1464,8 +1440,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-11 is a root stage - Stage-10 depends on stages: Stage-11 - Stage-4 depends on stages: Stage-10 + Stage-4 depends on stages: Stage-11 Stage-0 is a root stage STAGE PLANS: @@ -1519,266 +1494,9 @@ STAGE PLANS: 1 [Column[key]] Position of Big Table: 0 - Stage: Stage-10 - Map Reduce - Alias -> Map Operator Tree: - secondjoin:firstjoin:bigtbl - TableScan - alias: bigtbl - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key2} {value} - 1 - handleSkewJoin: false - keys: - 0 [Column[key1]] - 1 [Column[key]] - outputColumnNames: _col1, _col2 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col1 - type: string - expr: _col2 - type: string - outputColumnNames: _col1, _col2 - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col1} - 1 - handleSkewJoin: false - keys: - 0 [Column[_col2]] - 1 [Column[value]] - outputColumnNames: _col1 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col1 - type: string - outputColumnNames: _col1 - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - handleSkewJoin: false - keys: - 0 [Column[_col1]] - 1 [Column[key]] - Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: bigint - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: select count(*) FROM - ( - SELECT firstjoin.key1 as key1, firstjoin.key2 as key2, smallTbl2.key as key3, - firstjoin.value1 as value1, firstjoin.value2 as value2 FROM - (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, - bigTbl.value as value1, bigTbl.value as value2 - FROM bigTbl JOIN smallTbl1 - on (bigTbl.key1 = smallTbl1.key) - ) firstjoin - JOIN - smallTbl2 on (firstjoin.value1 = smallTbl2.value) - ) secondjoin - JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@bigtbl -PREHOOK: Input: default@smalltbl1 -PREHOOK: Input: default@smalltbl2 -PREHOOK: Input: default@smalltbl3 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) FROM - ( - SELECT firstjoin.key1 as key1, firstjoin.key2 as key2, smallTbl2.key as key3, - firstjoin.value1 as value1, firstjoin.value2 as value2 FROM - (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, - bigTbl.value as value1, bigTbl.value as value2 - FROM bigTbl JOIN smallTbl1 - on (bigTbl.key1 = smallTbl1.key) - ) firstjoin - JOIN - smallTbl2 on (firstjoin.value1 = smallTbl2.value) - ) secondjoin - JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@bigtbl -POSTHOOK: Input: default@smalltbl1 -POSTHOOK: Input: default@smalltbl2 -POSTHOOK: Input: default@smalltbl3 -#### A masked pattern was here #### -POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: bigtbl.key1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: bigtbl.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -1660 -PREHOOK: query: -- Now run the above query with M-MR optimization --- This should be a single MR job end-to-end. -explain -select count(*) FROM - ( - SELECT firstjoin.key1 as key1, firstjoin.key2 as key2, smallTbl2.key as key3, - firstjoin.value1 as value1, firstjoin.value2 as value2 FROM - (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, - bigTbl.value as value1, bigTbl.value as value2 - FROM bigTbl JOIN smallTbl1 - on (bigTbl.key1 = smallTbl1.key) - ) firstjoin - JOIN - smallTbl2 on (firstjoin.value1 = smallTbl2.value) - ) secondjoin - JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key) -PREHOOK: type: QUERY -POSTHOOK: query: -- Now run the above query with M-MR optimization --- This should be a single MR job end-to-end. -explain -select count(*) FROM - ( - SELECT firstjoin.key1 as key1, firstjoin.key2 as key2, smallTbl2.key as key3, - firstjoin.value1 as value1, firstjoin.value2 as value2 FROM - (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, - bigTbl.value as value1, bigTbl.value as value2 - FROM bigTbl JOIN smallTbl1 - on (bigTbl.key1 = smallTbl1.key) - ) firstjoin - JOIN - smallTbl2 on (firstjoin.value1 = smallTbl2.value) - ) secondjoin - JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key) -POSTHOOK: type: QUERY -POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: bigtbl.key1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: bigtbl.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bigTbl)) (TOK_TABREF (TOK_TABNAME smallTbl1)) (= (. (TOK_TABLE_OR_COL bigTbl) key1) (. (TOK_TABLE_OR_COL smallTbl1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value2)))) firstjoin) (TOK_TABREF (TOK_TABNAME smallTbl2)) (= (. (TOK_TABLE_OR_COL firstjoin) value1) (. (TOK_TABLE_OR_COL smallTbl2) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL firstjoin) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL firstjoin) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl2) key) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL firstjoin) value1) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL firstjoin) value2) value2)))) secondjoin) (TOK_TABREF (TOK_TABNAME smallTbl3)) (= (. (TOK_TABLE_OR_COL secondjoin) key2) (. (TOK_TABLE_OR_COL smallTbl3) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - -STAGE DEPENDENCIES: - Stage-11 is a root stage - Stage-10 depends on stages: Stage-11 - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-11 - Map Reduce Local Work - Alias -> Map Local Tables: - secondjoin:firstjoin:smalltbl1 - Fetch Operator - limit: -1 - secondjoin:smalltbl2 - Fetch Operator - limit: -1 - smalltbl3 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - secondjoin:firstjoin:smalltbl1 - TableScan - alias: smalltbl1 - HashTable Sink Operator - condition expressions: - 0 {key2} {value} - 1 - handleSkewJoin: false - keys: - 0 [Column[key1]] - 1 [Column[key]] - Position of Big Table: 0 - secondjoin:smalltbl2 - TableScan - alias: smalltbl2 - HashTable Sink Operator - condition expressions: - 0 {_col1} - 1 - handleSkewJoin: false - keys: - 0 [Column[_col2]] - 1 [Column[value]] - Position of Big Table: 0 - smalltbl3 - TableScan - alias: smalltbl3 - HashTable Sink Operator - condition expressions: - 0 - 1 - handleSkewJoin: false - keys: - 0 [Column[_col1]] - 1 [Column[key]] - Position of Big Table: 0 - - Stage: Stage-10 - Map Reduce - Alias -> Map Operator Tree: secondjoin:firstjoin:bigtbl TableScan alias: bigtbl @@ -1877,7 +1595,7 @@ PREHOOK: query: select count(*) FROM FROM bigTbl JOIN smallTbl1 on (bigTbl.key1 = smallTbl1.key) ) firstjoin - JOIN + JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) ) secondjoin JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key) @@ -1896,7 +1614,7 @@ POSTHOOK: query: select count(*) FROM FROM bigTbl JOIN smallTbl1 on (bigTbl.key1 = smallTbl1.key) ) firstjoin - JOIN + JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) ) secondjoin JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key) diff --git ql/src/test/results/clientpositive/multiMapJoin2.q.out ql/src/test/results/clientpositive/multiMapJoin2.q.out new file mode 100644 index 0000000..e0eb2e1 --- /dev/null +++ ql/src/test/results/clientpositive/multiMapJoin2.q.out @@ -0,0 +1,2515 @@ +PREHOOK: query: -- we will generate one MR job. +EXPLAIN +SELECT tmp.key +FROM (SELECT x1.key AS key FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key +PREHOOK: type: QUERY +POSTHOOK: query: -- we will generate one MR job. +EXPLAIN +SELECT tmp.key +FROM (SELECT x1.key AS key FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x1) (TOK_TABREF (TOK_TABNAME src1) y1) (= (. (TOK_TABLE_OR_COL x1) key) (. (TOK_TABLE_OR_COL y1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key)))) (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x2) (TOK_TABREF (TOK_TABNAME src1) y2) (= (. (TOK_TABLE_OR_COL x2) key) (. (TOK_TABLE_OR_COL y2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x2) key) key))))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp) key))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL tmp) key))))) + +STAGE DEPENDENCIES: + Stage-8 is a root stage + Stage-2 depends on stages: Stage-8 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + null-subquery1:tmp-subquery1:y1 + Fetch Operator + limit: -1 + null-subquery2:tmp-subquery2:y2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + null-subquery1:tmp-subquery1:y1 + TableScan + alias: y1 + HashTable Sink Operator + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + null-subquery2:tmp-subquery2:y2 + TableScan + alias: y2 + HashTable Sink Operator + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + null-subquery1:tmp-subquery1:x1 + TableScan + alias: x1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Union + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + null-subquery2:tmp-subquery2:x2 + TableScan + alias: x2 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Union + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT tmp.key +FROM (SELECT x1.key AS key FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT tmp.key +FROM (SELECT x1.key AS key FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 +128 +128 +128 +128 +128 +146 +146 +146 +146 +150 +150 +213 +213 +213 +213 +224 +224 +224 +224 +238 +238 +238 +238 +255 +255 +255 +255 +273 +273 +273 +273 +273 +273 +278 +278 +278 +278 +311 +311 +311 +311 +311 +311 +369 +369 +369 +369 +369 +369 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +406 +406 +406 +406 +406 +406 +406 +406 +66 +66 +98 +98 +98 +98 +PREHOOK: query: -- Check if the total size of local tables will be +-- larger than the limit that +-- we set through hive.auto.convert.join.noconditionaltask.size (right now, it is +-- 400 bytes). If so, do not merge. +-- For this query, we will merge the MapJoin of x2 and y2 into the MR job +-- for UNION ALL and ORDER BY. But, the MapJoin of x1 and y2 will not be merged +-- into that MR job. +EXPLAIN +SELECT tmp.key +FROM (SELECT x1.key AS key FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key +PREHOOK: type: QUERY +POSTHOOK: query: -- Check if the total size of local tables will be +-- larger than the limit that +-- we set through hive.auto.convert.join.noconditionaltask.size (right now, it is +-- 400 bytes). If so, do not merge. +-- For this query, we will merge the MapJoin of x2 and y2 into the MR job +-- for UNION ALL and ORDER BY. But, the MapJoin of x1 and y2 will not be merged +-- into that MR job. +EXPLAIN +SELECT tmp.key +FROM (SELECT x1.key AS key FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x1) (TOK_TABREF (TOK_TABNAME src1) y1) (= (. (TOK_TABLE_OR_COL x1) key) (. (TOK_TABLE_OR_COL y1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key)))) (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x2) (TOK_TABREF (TOK_TABNAME src1) y2) (= (. (TOK_TABLE_OR_COL x2) key) (. (TOK_TABLE_OR_COL y2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x2) key) key))))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp) key))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL tmp) key))))) + +STAGE DEPENDENCIES: + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-8 depends on stages: Stage-7 + Stage-2 depends on stages: Stage-8 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + null-subquery1:tmp-subquery1:y1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + null-subquery1:tmp-subquery1:y1 + TableScan + alias: y1 + HashTable Sink Operator + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + + Stage: Stage-7 + Map Reduce + Alias -> Map Operator Tree: + null-subquery1:tmp-subquery1:x1 + TableScan + alias: x1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + null-subquery2:tmp-subquery2:y2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + null-subquery2:tmp-subquery2:y2 + TableScan + alias: y2 + HashTable Sink Operator + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Union + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + null-subquery2:tmp-subquery2:x2 + TableScan + alias: x2 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Union + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT tmp.key +FROM (SELECT x1.key AS key FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT tmp.key +FROM (SELECT x1.key AS key FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 +128 +128 +128 +128 +128 +146 +146 +146 +146 +150 +150 +213 +213 +213 +213 +224 +224 +224 +224 +238 +238 +238 +238 +255 +255 +255 +255 +273 +273 +273 +273 +273 +273 +278 +278 +278 +278 +311 +311 +311 +311 +311 +311 +369 +369 +369 +369 +369 +369 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +406 +406 +406 +406 +406 +406 +406 +406 +66 +66 +98 +98 +98 +98 +PREHOOK: query: -- We will use two jobs. +-- We will generate one MR job for GROUP BY +-- on x1, one MR job for both the MapJoin of x2 and y2, the UNION ALL, and the +-- ORDER BY. +EXPLAIN +SELECT tmp.key +FROM (SELECT x1.key AS key FROM src1 x1 GROUP BY x1.key + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key +PREHOOK: type: QUERY +POSTHOOK: query: -- We will use two jobs. +-- We will generate one MR job for GROUP BY +-- on x1, one MR job for both the MapJoin of x2 and y2, the UNION ALL, and the +-- ORDER BY. +EXPLAIN +SELECT tmp.key +FROM (SELECT x1.key AS key FROM src1 x1 GROUP BY x1.key + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) key)))) (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x2) (TOK_TABREF (TOK_TABNAME src1) y2) (= (. (TOK_TABLE_OR_COL x2) key) (. (TOK_TABLE_OR_COL y2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x2) key) key))))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp) key))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL tmp) key))))) + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-6 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-6 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + null-subquery1:tmp-subquery1:x1 + TableScan + alias: x1 + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + null-subquery2:tmp-subquery2:y2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + null-subquery2:tmp-subquery2:y2 + TableScan + alias: y2 + HashTable Sink Operator + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Union + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + null-subquery2:tmp-subquery2:x2 + TableScan + alias: x2 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Union + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT tmp.key +FROM (SELECT x1.key AS key FROM src1 x1 GROUP BY x1.key + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT tmp.key +FROM (SELECT x1.key AS key FROM src1 x1 GROUP BY x1.key + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### + +128 +128 +128 +128 +146 +146 +146 +150 +150 +213 +213 +213 +224 +224 +224 +238 +238 +238 +255 +255 +255 +273 +273 +273 +273 +278 +278 +278 +311 +311 +311 +311 +369 +369 +369 +369 +401 +401 +401 +401 +401 +401 +406 +406 +406 +406 +406 +66 +66 +98 +98 +98 +PREHOOK: query: -- When Correlation Optimizer is disabled, +-- we will use 5 jobs. +-- We will generate one MR job to evaluate the sub-query tmp1, +-- one MR job to evaluate the sub-query tmp2, +-- one MR job for the Join of tmp1 and tmp2, +-- one MR job for aggregation on the result of the Join of tmp1 and tmp2, +-- and one MR job for the ORDER BY. +EXPLAIN +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +PREHOOK: type: QUERY +POSTHOOK: query: -- When Correlation Optimizer is disabled, +-- we will use 5 jobs. +-- We will generate one MR job to evaluate the sub-query tmp1, +-- one MR job to evaluate the sub-query tmp2, +-- one MR job for the Join of tmp1 and tmp2, +-- one MR job for aggregation on the result of the Join of tmp1 and tmp2, +-- and one MR job for the ORDER BY. +EXPLAIN +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x1) (TOK_TABREF (TOK_TABNAME src1) y1) (= (. (TOK_TABLE_OR_COL x1) key) (. (TOK_TABLE_OR_COL y1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) key)))) tmp1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x2) (TOK_TABREF (TOK_TABNAME src1) y2) (= (. (TOK_TABLE_OR_COL x2) key) (. (TOK_TABLE_OR_COL y2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x2) key) key)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x2) key)))) tmp2) (= (. (TOK_TABLE_OR_COL tmp1) key) (. (TOK_TABLE_OR_COL tmp2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp1) key) key) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL tmp1) key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL cnt))))) + +STAGE DEPENDENCIES: + Stage-17 is a root stage + Stage-2 depends on stages: Stage-17 + Stage-12 depends on stages: Stage-2, Stage-8 , consists of Stage-15, Stage-16, Stage-3 + Stage-15 has a backup stage: Stage-3 + Stage-10 depends on stages: Stage-15 + Stage-4 depends on stages: Stage-3, Stage-10, Stage-11 + Stage-5 depends on stages: Stage-4 + Stage-16 has a backup stage: Stage-3 + Stage-11 depends on stages: Stage-16 + Stage-3 + Stage-18 is a root stage + Stage-8 depends on stages: Stage-18 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-17 + Map Reduce Local Work + Alias -> Map Local Tables: + tmp2:y2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + tmp2:y2 + TableScan + alias: y2 + HashTable Sink Operator + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + tmp2:x2 + TableScan + alias: x2 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-12 + Conditional Operator + + Stage: Stage-15 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + Position of Big Table: 0 + + Stage: Stage-10 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-16 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME1 + HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + Position of Big Table: 1 + + Stage: Stage-11 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + outputColumnNames: _col0 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 1 + $INTNAME1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + value expressions: + expr: _col0 + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + handleSkewJoin: false + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-18 + Map Reduce Local Work + Alias -> Map Local Tables: + tmp1:y1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + tmp1:y1 + TableScan + alias: y1 + HashTable Sink Operator + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + + Stage: Stage-8 + Map Reduce + Alias -> Map Operator Tree: + tmp1:x1 + TableScan + alias: x1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 1 +146 1 +150 1 +213 1 +224 1 +238 1 +255 1 +273 1 +278 1 +311 1 +369 1 +401 1 +406 1 +66 1 +98 1 +PREHOOK: query: -- When Correlation Optimizer is enabled, +-- we will use two jobs. This first MR job will evaluate sub-queries of tmp1, tmp2, +-- the Join of tmp1 and tmp2, and the aggregation on the result of the Join of +-- tmp1 and tmp2. The second job will do the ORDER BY. +EXPLAIN +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +PREHOOK: type: QUERY +POSTHOOK: query: -- When Correlation Optimizer is enabled, +-- we will use two jobs. This first MR job will evaluate sub-queries of tmp1, tmp2, +-- the Join of tmp1 and tmp2, and the aggregation on the result of the Join of +-- tmp1 and tmp2. The second job will do the ORDER BY. +EXPLAIN +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x1) (TOK_TABREF (TOK_TABNAME src1) y1) (= (. (TOK_TABLE_OR_COL x1) key) (. (TOK_TABLE_OR_COL y1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) key)))) tmp1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x2) (TOK_TABREF (TOK_TABNAME src1) y2) (= (. (TOK_TABLE_OR_COL x2) key) (. (TOK_TABLE_OR_COL y2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x2) key) key)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x2) key)))) tmp2) (= (. (TOK_TABLE_OR_COL tmp1) key) (. (TOK_TABLE_OR_COL tmp2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp1) key) key) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL tmp1) key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL cnt))))) + +STAGE DEPENDENCIES: + Stage-9 is a root stage + Stage-2 depends on stages: Stage-9 + Stage-3 depends on stages: Stage-2 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + tmp1:y1 + Fetch Operator + limit: -1 + tmp2:y2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + tmp1:y1 + TableScan + alias: y1 + HashTable Sink Operator + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + tmp2:y2 + TableScan + alias: y2 + HashTable Sink Operator + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + tmp1:x1 + TableScan + alias: x1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + tmp2:x2 + TableScan + alias: x2 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 1 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Demux Operator + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Mux Operator + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + handleSkewJoin: false + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Mux Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: _col0 + type: string + mode: complete + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Mux Operator + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + handleSkewJoin: false + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Mux Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: _col0 + type: string + mode: complete + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 1 +146 1 +150 1 +213 1 +224 1 +238 1 +255 1 +273 1 +278 1 +311 1 +369 1 +401 1 +406 1 +66 1 +98 1 +PREHOOK: query: -- When Correlation Optimizer is disabled, +-- we will use five jobs. +-- We will generate one MR job to evaluate the sub-query tmp1, +-- one MR job to evaluate the sub-query tmp2, +-- one MR job for the Join of tmp1 and tmp2, +-- one MR job for aggregation on the result of the Join of tmp1 and tmp2, +-- and one MR job for the ORDER BY. +EXPLAIN +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src1 x1 + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +PREHOOK: type: QUERY +POSTHOOK: query: -- When Correlation Optimizer is disabled, +-- we will use five jobs. +-- We will generate one MR job to evaluate the sub-query tmp1, +-- one MR job to evaluate the sub-query tmp2, +-- one MR job for the Join of tmp1 and tmp2, +-- one MR job for aggregation on the result of the Join of tmp1 and tmp2, +-- and one MR job for the ORDER BY. +EXPLAIN +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src1 x1 + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) key)))) tmp1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x2) (TOK_TABREF (TOK_TABNAME src1) y2) (= (. (TOK_TABLE_OR_COL x2) key) (. (TOK_TABLE_OR_COL y2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x2) key) key)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x2) key)))) tmp2) (= (. (TOK_TABLE_OR_COL tmp1) key) (. (TOK_TABLE_OR_COL tmp2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp1) key) key) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL tmp1) key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL cnt))))) + +STAGE DEPENDENCIES: + Stage-7 is a root stage + Stage-10 depends on stages: Stage-2, Stage-7 , consists of Stage-12, Stage-13, Stage-3 + Stage-12 has a backup stage: Stage-3 + Stage-8 depends on stages: Stage-12 + Stage-4 depends on stages: Stage-3, Stage-8, Stage-9 + Stage-5 depends on stages: Stage-4 + Stage-13 has a backup stage: Stage-3 + Stage-9 depends on stages: Stage-13 + Stage-3 + Stage-14 is a root stage + Stage-2 depends on stages: Stage-14 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-7 + Map Reduce + Alias -> Map Operator Tree: + tmp1:x1 + TableScan + alias: x1 + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-10 + Conditional Operator + + Stage: Stage-12 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + Position of Big Table: 0 + + Stage: Stage-8 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-13 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME1 + HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + Position of Big Table: 1 + + Stage: Stage-9 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + outputColumnNames: _col0 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 1 + $INTNAME1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + value expressions: + expr: _col0 + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + handleSkewJoin: false + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-14 + Map Reduce Local Work + Alias -> Map Local Tables: + tmp2:y2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + tmp2:y2 + TableScan + alias: y2 + HashTable Sink Operator + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + tmp2:x2 + TableScan + alias: x2 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src1 x1 + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src1 x1 + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 1 +146 1 +150 1 +213 1 +224 1 +238 1 +255 1 +273 1 +278 1 +311 1 +369 1 +401 1 +406 1 +66 1 +98 1 +PREHOOK: query: -- When Correlation Optimizer is enabled, +-- we will use two job. This first MR job will evaluate sub-queries of tmp1, tmp2, +-- the Join of tmp1 and tmp2, and the aggregation on the result of the Join of +-- tmp1 and tmp2. The second job will do the ORDER BY. +EXPLAIN +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src1 x1 + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +PREHOOK: type: QUERY +POSTHOOK: query: -- When Correlation Optimizer is enabled, +-- we will use two job. This first MR job will evaluate sub-queries of tmp1, tmp2, +-- the Join of tmp1 and tmp2, and the aggregation on the result of the Join of +-- tmp1 and tmp2. The second job will do the ORDER BY. +EXPLAIN +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src1 x1 + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) key)))) tmp1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x2) (TOK_TABREF (TOK_TABNAME src1) y2) (= (. (TOK_TABLE_OR_COL x2) key) (. (TOK_TABLE_OR_COL y2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x2) key) key)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x2) key)))) tmp2) (= (. (TOK_TABLE_OR_COL tmp1) key) (. (TOK_TABLE_OR_COL tmp2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp1) key) key) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL tmp1) key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL cnt))))) + +STAGE DEPENDENCIES: + Stage-7 is a root stage + Stage-2 depends on stages: Stage-7 + Stage-3 depends on stages: Stage-2 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + tmp2:y2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + tmp2:y2 + TableScan + alias: y2 + HashTable Sink Operator + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + tmp1:x1 + TableScan + alias: x1 + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + tmp2:x2 + TableScan + alias: x2 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 1 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Demux Operator + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Mux Operator + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + handleSkewJoin: false + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Mux Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: _col0 + type: string + mode: complete + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Mux Operator + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + handleSkewJoin: false + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Mux Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: _col0 + type: string + mode: complete + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src1 x1 + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src1 x1 + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 1 +146 1 +150 1 +213 1 +224 1 +238 1 +255 1 +273 1 +278 1 +311 1 +369 1 +401 1 +406 1 +66 1 +98 1 +PREHOOK: query: -- Check if we can correctly handle partitioned table. +CREATE TABLE part_table(key string, value string) PARTITIONED BY (partitionId int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Check if we can correctly handle partitioned table. +CREATE TABLE part_table(key string, value string) PARTITIONED BY (partitionId int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@part_table +PREHOOK: query: INSERT OVERWRITE TABLE part_table PARTITION (partitionId=1) + SELECT key, value FROM src ORDER BY key, value LIMIT 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@part_table@partitionid=1 +POSTHOOK: query: INSERT OVERWRITE TABLE part_table PARTITION (partitionId=1) + SELECT key, value FROM src ORDER BY key, value LIMIT 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@part_table@partitionid=1 +POSTHOOK: Lineage: part_table PARTITION(partitionid=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: part_table PARTITION(partitionid=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE part_table PARTITION (partitionId=2) + SELECT key, value FROM src1 ORDER BY key, value +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: default@part_table@partitionid=2 +POSTHOOK: query: INSERT OVERWRITE TABLE part_table PARTITION (partitionId=2) + SELECT key, value FROM src1 ORDER BY key, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@part_table@partitionid=2 +POSTHOOK: Lineage: part_table PARTITION(partitionid=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: part_table PARTITION(partitionid=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: part_table PARTITION(partitionid=2).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: part_table PARTITION(partitionid=2).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: EXPLAIN +SELECT count(*) +FROM part_table x JOIN src1 y ON (x.key = y.key) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT count(*) +FROM part_table x JOIN src1 y ON (x.key = y.key) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: part_table PARTITION(partitionid=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: part_table PARTITION(partitionid=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: part_table PARTITION(partitionid=2).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: part_table PARTITION(partitionid=2).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME part_table) x) (TOK_TABREF (TOK_TABNAME src1) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + y + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + y + TableScan + alias: y + HashTable Sink Operator + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + x + TableScan + alias: x + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT count(*) +FROM part_table x JOIN src1 y ON (x.key = y.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@part_table +PREHOOK: Input: default@part_table@partitionid=1 +PREHOOK: Input: default@part_table@partitionid=2 +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(*) +FROM part_table x JOIN src1 y ON (x.key = y.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_table +POSTHOOK: Input: default@part_table@partitionid=1 +POSTHOOK: Input: default@part_table@partitionid=2 +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: Lineage: part_table PARTITION(partitionid=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: part_table PARTITION(partitionid=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: part_table PARTITION(partitionid=2).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: part_table PARTITION(partitionid=2).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +121 diff --git ql/src/test/results/clientpositive/multi_join_union.q.out ql/src/test/results/clientpositive/multi_join_union.q.out index 5182bdf..2b54601 100644 --- ql/src/test/results/clientpositive/multi_join_union.q.out +++ ql/src/test/results/clientpositive/multi_join_union.q.out @@ -41,7 +41,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-8 is a root stage - Stage-7 depends on stages: Stage-8 + Stage-6 depends on stages: Stage-8 Stage-0 is a root stage STAGE PLANS: @@ -125,7 +125,7 @@ STAGE PLANS: 1 [Column[_col1]] Position of Big Table: 0 - Stage: Stage-7 + Stage: Stage-6 Map Reduce Alias -> Map Operator Tree: b diff --git ql/src/test/results/clientpositive/union34.q.out ql/src/test/results/clientpositive/union34.q.out index 166062a..f842813 100644 --- ql/src/test/results/clientpositive/union34.q.out +++ ql/src/test/results/clientpositive/union34.q.out @@ -1,22 +1,6 @@ -PREHOOK: query: -- HIVE-4342 --- Maponly union(UNION-13) is merged into non-maponly union(UNION-15) --- In this case, task for UNION-13 should be removed from top-task and merged into task for UNION-15 --- TS[2]-SEL[3]-RS[5]-JOIN[6]-SEL[7]-UNION[15]-SEL[16]-RS[17]-EX[18]-FS[19] --- TS[0]-SEL[1]-RS[4]-JOIN[6] --- TS[8]-SEL[9]-UNION[13]-SEL[14]-UNION[15] --- TS[11]-SEL[12]-UNION[13] - -create table src10_1 (key string, value string) +PREHOOK: query: create table src10_1 (key string, value string) PREHOOK: type: CREATETABLE -POSTHOOK: query: -- HIVE-4342 --- Maponly union(UNION-13) is merged into non-maponly union(UNION-15) --- In this case, task for UNION-13 should be removed from top-task and merged into task for UNION-15 --- TS[2]-SEL[3]-RS[5]-JOIN[6]-SEL[7]-UNION[15]-SEL[16]-RS[17]-EX[18]-FS[19] --- TS[0]-SEL[1]-RS[4]-JOIN[6] --- TS[8]-SEL[9]-UNION[13]-SEL[14]-UNION[15] --- TS[11]-SEL[12]-UNION[13] - -create table src10_1 (key string, value string) +POSTHOOK: query: create table src10_1 (key string, value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@src10_1 PREHOOK: query: create table src10_2 (key string, value string) @@ -64,14 +48,18 @@ POSTHOOK: Lineage: src10_3.key SIMPLE [(src)src.FieldSchema(name:key, type:strin POSTHOOK: Lineage: src10_3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src10_4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: src10_4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain +PREHOOK: query: -- When we convert the Join of sub1 and sub0 into a MapJoin, +-- we can use a single MR job to evaluate this entire query. +explain SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 ) alias1 order by key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- When we convert the Join of sub1 and sub0 into a MapJoin, +-- we can use a single MR job to evaluate this entire query. +explain SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL @@ -91,8 +79,7 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-7 is a root stage - Stage-6 depends on stages: Stage-7 - Stage-2 depends on stages: Stage-6 + Stage-2 depends on stages: Stage-7 Stage-0 is a root stage STAGE PLANS: @@ -123,7 +110,7 @@ STAGE PLANS: 1 [Column[_col0]] Position of Big Table: 1 - Stage: Stage-6 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: null-subquery1:alias1-subquery1:sub0:src10_2 @@ -153,39 +140,25 @@ STAGE PLANS: expr: _col1 type: string outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - TableScan - Union - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Union + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string null-subquery2:alias1-subquery2-subquery1:alias0-subquery1:sub2:src10_3 TableScan alias: src10_3 @@ -260,6 +233,8 @@ STAGE PLANS: type: string expr: _col1 type: string + Local Work: + Map Reduce Local Work Reduce Operator Tree: Extract File Output Operator @@ -334,14 +309,22 @@ POSTHOOK: Lineage: src10_4.value SIMPLE [(src)src.FieldSchema(name:value, type:s 98 val_98 98 val_98 98 val_98 -PREHOOK: query: explain +PREHOOK: query: -- When we do not convert the Join of sub1 and sub0 into a MapJoin, +-- we need to use two MR jobs to evaluate this query. +-- The first job is for the Join of sub1 and sub2. The second job +-- is for the UNION ALL and ORDER BY. +explain SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION ALL SELECT * FROM src10_4 ) alias0 ) alias1 order by key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- When we do not convert the Join of sub1 and sub0 into a MapJoin, +-- we need to use two MR jobs to evaluate this query. +-- The first job is for the Join of sub1 and sub2. The second job +-- is for the UNION ALL and ORDER BY. +explain SELECT * FROM ( SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key) UNION ALL