diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java index 66b84ff..42b10a8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java @@ -1,9 +1,4 @@ /** - <<<<<<< HEAD - ======= - * Copyright 2010 The Apache Software Foundation - * - >>>>>>> HIVE-1402 [jira] Add parallel ORDER BY to Hive * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -30,30 +25,30 @@ public class OperatorUtils { public static Set findOperators(Operator start, Class clazz) { - return findOperator(start, clazz, new HashSet()); + return findOperators(start, clazz, new HashSet()); } public static T findSingleOperator(Operator start, Class clazz) { - Set found = findOperator(start, clazz, new HashSet()); + Set found = findOperators(start, clazz, new HashSet()); return found.size() == 1 ? found.iterator().next() : null; } public static Set findOperators(Collection> starts, Class clazz) { Set found = new HashSet(); for (Operator start : starts) { - findOperator(start, clazz, found); + findOperators(start, clazz, found); } return found; } @SuppressWarnings("unchecked") - private static Set findOperator(Operator start, Class clazz, Set found) { + private static Set findOperators(Operator start, Class clazz, Set found) { if (clazz.isInstance(start)) { found.add((T) start); } if (start.getChildOperators() != null) { for (Operator child : start.getChildOperators()) { - findOperator(child, clazz, found); + findOperators(child, clazz, found); } } return found; diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java index f98878c..39a6d04 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java @@ -26,6 +26,7 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.ObjectPair; @@ -35,6 +36,7 @@ import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorUtils; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; @@ -290,130 +292,141 @@ private void copyReducerConf(MapRedTask task, MapRedTask childTask) { * A task and its child task has been converted from join to mapjoin. * See if the two tasks can be merged. */ - private void mergeMapJoinTaskWithMapReduceTask(MapRedTask mapJoinTask, Configuration conf) { + private void mergeMapJoinTaskWithMapReduceTask(MapRedTask mapJoinTask, Configuration conf) + throws SemanticException{ + // Step 1: Check if mapJoinTask has a single child. + // If so, check if we can merge mapJoinTask into that child. if (mapJoinTask.getChildTasks() == null || mapJoinTask.getChildTasks().size() > 1) { // No child-task to merge, nothing to do or there are more than one // child-tasks in which case we don't want to do anything. return; } - Task firstChildTask = mapJoinTask.getChildTasks().get(0); - if (!(firstChildTask instanceof MapRedTask)) { - // Nothing to do if it is not a mapreduce task. - return; - } - MapRedTask childTask = (MapRedTask) firstChildTask; - MapredWork mapJoinWork = mapJoinTask.getWork(); - MapredWork childWork = childTask.getWork(); - Operator childReducer = childWork.getReducer(); - if (childReducer == null) { - // Not a MR job, nothing to merge. - return; - } - // Can this be merged - Map> aliasToWork = mapJoinWork.getAliasToWork(); - if (aliasToWork.size() > 1) { - return; - } - Map> childPathToAliases = childWork.getPathToAliases(); - if (childPathToAliases.size() > 1) { + Task childTask = mapJoinTask.getChildTasks().get(0); + if (!(childTask instanceof MapRedTask)) { + // Nothing to do if it is not a MapReduce task. return; } - // Locate leaf operator of the map-join task. Start by initializing leaf - // operator to be root operator. - Operator mapJoinLeafOperator = aliasToWork.values().iterator().next(); - while (mapJoinLeafOperator.getChildOperators() != null) { - // Dont perform this optimization for multi-table inserts - if (mapJoinLeafOperator.getChildOperators().size() > 1) { - return; - } - mapJoinLeafOperator = mapJoinLeafOperator.getChildOperators().get(0); - } + MapRedTask childMRTask = (MapRedTask) childTask; + MapredWork mapJoinWork = mapJoinTask.getWork(); + MapredWork childMRWork = childMRTask.getWork(); - assert (mapJoinLeafOperator instanceof FileSinkOperator); - if (!(mapJoinLeafOperator instanceof FileSinkOperator)) { - // Sanity check, shouldn't happen. + Map> mapJoinPathToAliases = mapJoinWork.getPathToAliases(); + Map> mapJoinAliasToWork = + mapJoinWork.getAliasToWork(); + if (mapJoinPathToAliases.size() > 1 || mapJoinAliasToWork.size() > 1) { + // Do not merge if the MapredWork of MapJoin has multiple input aliases. return; } - FileSinkOperator mapJoinTaskFileSinkOperator = (FileSinkOperator) mapJoinLeafOperator; - - // The filesink writes to a different directory - String workDir = mapJoinTaskFileSinkOperator.getConf().getDirName(); - if (!childPathToAliases.keySet().iterator().next().equals(workDir)) { + Entry> mapJoinPathToAliasesEntry = + mapJoinPathToAliases.entrySet().iterator().next(); + String mapJoinPath = mapJoinPathToAliasesEntry.getKey(); + if (mapJoinPathToAliasesEntry.getValue().size() != 1) { + throw new SemanticException("Expected only 1 alias assigned to path " + mapJoinPath + + ". Found " + mapJoinPathToAliasesEntry.getValue().size() + " aliases which are " + + mapJoinPathToAliasesEntry.getValue().toString()); + } + String mapJoinAlias = mapJoinPathToAliasesEntry.getValue().get(0); + TableScanOperator mapJoinTaskTableScanOperator = + OperatorUtils.findSingleOperator( + mapJoinAliasToWork.get(mapJoinAlias), TableScanOperator.class); + if (mapJoinTaskTableScanOperator == null) { + throw new SemanticException("Expected a " + TableScanOperator.getOperatorName() + + " operator as the work associated with alias " + mapJoinAlias + + ". Found a " + mapJoinAliasToWork.get(mapJoinAlias).getName() + " operator."); + } + FileSinkOperator mapJoinTaskFileSinkOperator = + OperatorUtils.findSingleOperator( + mapJoinTaskTableScanOperator, FileSinkOperator.class); + if (mapJoinTaskTableScanOperator == null || mapJoinTaskFileSinkOperator == null) { + throw new SemanticException("Cannot find the " + FileSinkOperator.getOperatorName() + + " operator at the last operator of the MapJoin Task."); + } + + // The mapJoinTaskFileSinkOperator writes to a different directory + String childMRPath = mapJoinTaskFileSinkOperator.getConf().getDirName(); + List childMRAliases = childMRWork.getPathToAliases().get(childMRPath); + if (childMRAliases == null || childMRAliases.size() != 1) { return; } + String childMRAlias = childMRAliases.get(0); MapredLocalWork mapJoinLocalWork = mapJoinWork.getMapLocalWork(); - MapredLocalWork childLocalWork = childWork.getMapLocalWork(); - - // Either of them should not be bucketed + MapredLocalWork childLocalWork = childMRWork.getMapLocalWork(); if ((mapJoinLocalWork != null && mapJoinLocalWork.getBucketMapjoinContext() != null) || (childLocalWork != null && childLocalWork.getBucketMapjoinContext() != null)) { + // Do not merge if either of them is bucketed. return; } - - if (childWork.getAliasToWork().size() > 1) { - return; - } - - Operator childAliasOp = - childWork.getAliasToWork().values().iterator().next(); - if (mapJoinTaskFileSinkOperator.getParentOperators().size() > 1) { + TableScanOperator childMRTaskTableScanOperator = + OperatorUtils.findSingleOperator( + childMRWork.getAliasToWork().get(childMRAlias), TableScanOperator.class); + if (childMRTaskTableScanOperator == null) { + throw new SemanticException("Expected a " + TableScanOperator.getOperatorName() + + " operator as the work associated with alias " + childMRAlias + + ". Found a " + childMRWork.getAliasToWork().get(childMRAlias).getName() + " operator."); + } + + List> parentsInMapJoinTask = + mapJoinTaskFileSinkOperator.getParentOperators(); + List> childrenInChildMRTask = + childMRTaskTableScanOperator.getChildOperators(); + if (parentsInMapJoinTask.size() > 1 || childrenInChildMRTask.size() > 1) { + // Do not merge if we do not know how to connect two operator trees. return; } - // remove the unnecessary TableScan - if (childAliasOp instanceof TableScanOperator) { - TableScanOperator tso = (TableScanOperator)childAliasOp; - if (tso.getNumChild() != 1) { - // shouldn't happen - return; + // Step 2: Merge mapJoinTask into the Map-side of its child. + // Step 2.1: Connect the operator trees of two MapRedTasks. + Operator parentInMapJoinTask = parentsInMapJoinTask.get(0); + Operator childInChildMRTask = childrenInChildMRTask.get(0); + parentInMapJoinTask.replaceChild(mapJoinTaskFileSinkOperator, childInChildMRTask); + childInChildMRTask.replaceParent(childMRTaskTableScanOperator, parentInMapJoinTask); + + // Step 2.2: Replace the corresponding part childMRWork's MapWork. + childMRWork.replaceMapWork(childMRPath, childMRAlias, + mapJoinPath, mapJoinAlias, mapJoinTaskTableScanOperator, + mapJoinWork.getPathToPartitionInfo().get(mapJoinPath)); + + // Step 2.3: Fill up stuff in local work + if (mapJoinLocalWork != null) { + if (childLocalWork == null) { + childMRWork.setMapLocalWork(mapJoinLocalWork); + } else { + childLocalWork.getAliasToFetchWork().putAll(mapJoinLocalWork.getAliasToFetchWork()); + childLocalWork.getAliasToWork().putAll(mapJoinLocalWork.getAliasToWork()); } - childAliasOp = tso.getChildOperators().get(0); - childAliasOp.getParentOperators().remove(tso); } - // Merge the 2 trees - remove the FileSinkOperator from the first tree pass it to the - // top of the second - Operator parentFOp = mapJoinTaskFileSinkOperator - .getParentOperators().get(0); - parentFOp.getChildOperators().remove(mapJoinTaskFileSinkOperator); - parentFOp.getChildOperators().add(childAliasOp); - List> parentOps = - new ArrayList>(); - parentOps.add(parentFOp); - childAliasOp.setParentOperators(parentOps); - - mapJoinWork.getAliasToPartnInfo().putAll(childWork.getAliasToPartnInfo()); - for (Map.Entry childWorkEntry : childWork.getPathToPartitionInfo() - .entrySet()) { - if (childWork.getAliasToPartnInfo().containsValue(childWorkEntry.getKey())) { - mapJoinWork.getPathToPartitionInfo() - .put(childWorkEntry.getKey(), childWorkEntry.getValue()); + // Step 2.4: Remove this MapJoin task + List> parentTasks = mapJoinTask.getParentTasks(); + mapJoinTask.setParentTasks(null); + mapJoinTask.setChildTasks(null); + childMRTask.getParentTasks().remove(mapJoinTask); + if (parentTasks != null) { + childMRTask.getParentTasks().addAll(parentTasks); + for (Task parentTask : parentTasks) { + parentTask.getChildTasks().remove(mapJoinTask); + if (!parentTask.getChildTasks().contains(childMRTask)) { + parentTask.getChildTasks().add(childMRTask); + } } - } - - // Fill up stuff in local work - if (mapJoinLocalWork != null && childLocalWork != null) { - mapJoinLocalWork.getAliasToFetchWork().putAll(childLocalWork.getAliasToFetchWork()); - mapJoinLocalWork.getAliasToWork().putAll(childLocalWork.getAliasToWork()); - } - - // remove the child task - List> oldChildTasks = childTask.getChildTasks(); - mapJoinTask.setChildTasks(oldChildTasks); - if (oldChildTasks != null) { - for (Task oldChildTask : oldChildTasks) { - oldChildTask.getParentTasks().remove(childTask); - oldChildTask.getParentTasks().add(mapJoinTask); + } else { + if (physicalContext.getRootTasks().contains(mapJoinTask)) { + physicalContext.removeFromRootTask(mapJoinTask); + if (childMRTask.getParentTasks() != null && + childMRTask.getParentTasks().size() == 0 && + !physicalContext.getRootTasks().contains(childMRTask)) { + physicalContext.addToRootTask(childMRTask); + } } } - - // Copy the reducer conf. - copyReducerConf(mapJoinTask, childTask); + if (childMRTask.getParentTasks().size() == 0) { + childMRTask.setParentTasks(null); + } } public static boolean cannotConvert(String bigTableAlias, diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java index 7cbb1ff..3a4cb57 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java @@ -374,6 +374,46 @@ public void addMapWork(String path, String alias, Operator work, aliasToWork.put(alias, work); } + /** Replace all entries associated with oldAlias in pathToAliases, aliasToWork, + * and aliasToPartnInfo to mappings associated with newPath. + * At the end, if there is no alias associated with oldPath, we also delete entries + * associated with oldPath in pathToAliases and pathToPartitionInfo. + * @param oldPath + * @param newPath + * @param newAlias + * @param newWork + * @param newPd + */ + public void replaceMapWork(String oldPath, String oldAlias, + String newPath, String newAlias, Operator newWork, PartitionDesc newPd) { + if (!pathToAliases.containsKey(oldPath)) { + // If oldPath is not in pathToAliases, there is no entry we need to delete from + // pathToAliases, pathToPartitionInfo, aliasToWork, and aliasToPartnInfo. + return; + } + + // Remove old entries. + pathToAliases.get(oldPath).remove(oldAlias); + aliasToWork.remove(oldAlias); + aliasToPartnInfo.remove(oldAlias); + if (pathToAliases.get(oldPath).size() == 0) { + // oldPath will not be used. So, we can delete entries associated with oldPath + // from pathToAliases and pathToAliases. + pathToAliases.remove(oldPath); + pathToPartitionInfo.remove(oldPath); + } + + // Add new entries. + if (!pathToAliases.containsKey(newPath)) { + // It is the first we see newPath in this MapWork. + pathToAliases.put(newPath, new ArrayList()); + pathToPartitionInfo.put(newPath, newPd); + } + pathToAliases.get(newPath).add(newAlias); + aliasToWork.put(newAlias, newWork); + aliasToPartnInfo.put(newAlias, pathToPartitionInfo.get(newPath)); + } + @SuppressWarnings("nls") public String isInvalid() { if ((getNumReduceTasks() >= 1) && (getReducer() == null)) { diff --git ql/src/test/queries/clientpositive/multiMapJoin2.q ql/src/test/queries/clientpositive/multiMapJoin2.q new file mode 100644 index 0000000..6640549 --- /dev/null +++ ql/src/test/queries/clientpositive/multiMapJoin2.q @@ -0,0 +1,195 @@ +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; + +set hive.optimize.mapjoin.mapreduce=false; +-- When hive.optimize.mapjoin.mapreduce=false, we will generate two Map-only jobs +-- and one MR job. +EXPLAIN +SELECT tmp.key +FROM (SELECT x1.key AS key FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key; + +SELECT tmp.key +FROM (SELECT x1.key AS key FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key; + +set hive.optimize.mapjoin.mapreduce=true; +-- When hive.optimize.mapjoin.mapreduce=true, we will generate one MR job. +EXPLAIN +SELECT tmp.key +FROM (SELECT x1.key AS key FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key; + +SELECT tmp.key +FROM (SELECT x1.key AS key FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key; + +set hive.optimize.mapjoin.mapreduce=false; +-- When hive.optimize.mapjoin.mapreduce=false, we will use totally three jobs. +-- We will generate one MR job for GROUP BY +-- on x1, one Map-only job for the MapJoin of x2 and y2, and one MR job +-- for the UNION ALL and ORDER BY. +EXPLAIN +SELECT tmp.key +FROM (SELECT x1.key AS key FROM src1 x1 GROUP BY x1.key + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key; + +SELECT tmp.key +FROM (SELECT x1.key AS key FROM src1 x1 GROUP BY x1.key + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key; + +set hive.optimize.mapjoin.mapreduce=true; +-- When hive.optimize.mapjoin.mapreduce=true, we will use two jobs. +-- We will generate one MR job for GROUP BY +-- on x1, one MR job for both the MapJoin of x2 and y2, the UNION ALL, and the +-- ORDER BY. +EXPLAIN +SELECT tmp.key +FROM (SELECT x1.key AS key FROM src1 x1 GROUP BY x1.key + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key; + +SELECT tmp.key +FROM (SELECT x1.key AS key FROM src1 x1 GROUP BY x1.key + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key; + +set hive.optimize.mapjoin.mapreduce=false; +set hive.optimize.correlation=false; +-- When hive.optimize.mapjoin.mapreduce=false and Correlation Optimizer is disabled, +-- we will use 7 jobs. +-- We will generate one Map-only job for the MapJoin of x1 and y1, +-- one Map-only job for the Map-Join of x2 and y2, +-- one MR job for the aggregation in the sub-query tmp1, +-- one MR job for the aggregation in the sub-query tmp2, +-- one MR job for the Join of tmp1 and tmp2, +-- one MR job for aggregation on the result of the Join of tmp1 and tmp2, +-- and one MR job for the ORDER BY. +EXPLAIN +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt; + +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt; + +set hive.optimize.mapjoin.mapreduce=true; +set hive.optimize.correlation=true; +-- When hive.optimize.mapjoin.mapreduce=true and Correlation Optimizer is enabled, +-- we will use two jobs. This first MR job will evaluate sub-queries of tmp1, tmp2, +-- the Join of tmp1 and tmp2, and the aggregation on the result of the Join of +-- tmp1 and tmp2. The second job will do the ORDER BY. +EXPLAIN +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt; + +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt; + +set hive.optimize.mapjoin.mapreduce=false; +set hive.optimize.correlation=false; +-- When hive.optimize.mapjoin.mapreduce=false and Correlation Optimizer is disabled, +-- we will use fix jobs. +-- We will generate one MR job to evaluate the sub-query tmp1, +-- one Map-only job for the Map-Join of x2 and y2, +-- one MR job for the aggregation in the sub-query tmp2, +-- one MR job for the Join of tmp1 and tmp2, +-- one MR job for aggregation on the result of the Join of tmp1 and tmp2, +-- and one MR job for the ORDER BY. +EXPLAIN +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src1 x1 + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt; + +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src1 x1 + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt; + +set hive.optimize.mapjoin.mapreduce=true; +set hive.optimize.correlation=true; +-- When hive.optimize.mapjoin.mapreduce=true and Correlation Optimizer is enabled, +-- we will use two job. This first MR job will evaluate sub-queries of tmp1, tmp2, +-- the Join of tmp1 and tmp2, and the aggregation on the result of the Join of +-- tmp1 and tmp2. The second job will do the ORDER BY. +EXPLAIN +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src1 x1 + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt; + +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src1 x1 + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt; + diff --git ql/src/test/results/clientpositive/multiMapJoin2.q.out ql/src/test/results/clientpositive/multiMapJoin2.q.out new file mode 100644 index 0000000..a7012d7 --- /dev/null +++ ql/src/test/results/clientpositive/multiMapJoin2.q.out @@ -0,0 +1,2663 @@ +PREHOOK: query: -- When hive.optimize.mapjoin.mapreduce=false, we will generate two Map-only jobs +-- and one MR job. +EXPLAIN +SELECT tmp.key +FROM (SELECT x1.key AS key FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key +PREHOOK: type: QUERY +POSTHOOK: query: -- When hive.optimize.mapjoin.mapreduce=false, we will generate two Map-only jobs +-- and one MR job. +EXPLAIN +SELECT tmp.key +FROM (SELECT x1.key AS key FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x1) (TOK_TABREF (TOK_TABNAME src1) y1) (= (. (TOK_TABLE_OR_COL x1) key) (. (TOK_TABLE_OR_COL y1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key)))) (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x2) (TOK_TABREF (TOK_TABNAME src1) y2) (= (. (TOK_TABLE_OR_COL x2) key) (. (TOK_TABLE_OR_COL y2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x2) key) key))))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp) key))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL tmp) key))))) + +STAGE DEPENDENCIES: + Stage-8 is a root stage + Stage-6 depends on stages: Stage-8 + Stage-2 depends on stages: Stage-6, Stage-7 + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + null-subquery2:tmp-subquery2:y2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + null-subquery2:tmp-subquery2:y2 + TableScan + alias: y2 + HashTable Sink Operator + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + + Stage: Stage-6 + Map Reduce + Alias -> Map Operator Tree: + null-subquery2:tmp-subquery2:x2 + TableScan + alias: x2 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Union + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string +#### A masked pattern was here #### + TableScan + Union + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + null-subquery1:tmp-subquery1:y1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + null-subquery1:tmp-subquery1:y1 + TableScan + alias: y1 + HashTable Sink Operator + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + + Stage: Stage-7 + Map Reduce + Alias -> Map Operator Tree: + null-subquery1:tmp-subquery1:x1 + TableScan + alias: x1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT tmp.key +FROM (SELECT x1.key AS key FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT tmp.key +FROM (SELECT x1.key AS key FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 +128 +128 +128 +128 +128 +146 +146 +146 +146 +150 +150 +213 +213 +213 +213 +224 +224 +224 +224 +238 +238 +238 +238 +255 +255 +255 +255 +273 +273 +273 +273 +273 +273 +278 +278 +278 +278 +311 +311 +311 +311 +311 +311 +369 +369 +369 +369 +369 +369 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +406 +406 +406 +406 +406 +406 +406 +406 +66 +66 +98 +98 +98 +98 +PREHOOK: query: -- When hive.optimize.mapjoin.mapreduce=true, we will generate one MR job. +EXPLAIN +SELECT tmp.key +FROM (SELECT x1.key AS key FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key +PREHOOK: type: QUERY +POSTHOOK: query: -- When hive.optimize.mapjoin.mapreduce=true, we will generate one MR job. +EXPLAIN +SELECT tmp.key +FROM (SELECT x1.key AS key FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x1) (TOK_TABREF (TOK_TABNAME src1) y1) (= (. (TOK_TABLE_OR_COL x1) key) (. (TOK_TABLE_OR_COL y1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key)))) (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x2) (TOK_TABREF (TOK_TABNAME src1) y2) (= (. (TOK_TABLE_OR_COL x2) key) (. (TOK_TABLE_OR_COL y2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x2) key) key))))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp) key))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL tmp) key))))) + +STAGE DEPENDENCIES: + Stage-8 is a root stage + Stage-2 depends on stages: Stage-8 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + null-subquery1:tmp-subquery1:y1 + Fetch Operator + limit: -1 + null-subquery2:tmp-subquery2:y2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + null-subquery1:tmp-subquery1:y1 + TableScan + alias: y1 + HashTable Sink Operator + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + null-subquery2:tmp-subquery2:y2 + TableScan + alias: y2 + HashTable Sink Operator + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + null-subquery1:tmp-subquery1:x1 + TableScan + alias: x1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Union + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + null-subquery2:tmp-subquery2:x2 + TableScan + alias: x2 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Union + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT tmp.key +FROM (SELECT x1.key AS key FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT tmp.key +FROM (SELECT x1.key AS key FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 +128 +128 +128 +128 +128 +146 +146 +146 +146 +150 +150 +213 +213 +213 +213 +224 +224 +224 +224 +238 +238 +238 +238 +255 +255 +255 +255 +273 +273 +273 +273 +273 +273 +278 +278 +278 +278 +311 +311 +311 +311 +311 +311 +369 +369 +369 +369 +369 +369 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +406 +406 +406 +406 +406 +406 +406 +406 +66 +66 +98 +98 +98 +98 +PREHOOK: query: -- When hive.optimize.mapjoin.mapreduce=false, we will use totally three jobs. +-- We will generate one MR job for GROUP BY +-- on x1, one Map-only job for the MapJoin of x2 and y2, and one MR job +-- for the UNION ALL and ORDER BY. +EXPLAIN +SELECT tmp.key +FROM (SELECT x1.key AS key FROM src1 x1 GROUP BY x1.key + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key +PREHOOK: type: QUERY +POSTHOOK: query: -- When hive.optimize.mapjoin.mapreduce=false, we will use totally three jobs. +-- We will generate one MR job for GROUP BY +-- on x1, one Map-only job for the MapJoin of x2 and y2, and one MR job +-- for the UNION ALL and ORDER BY. +EXPLAIN +SELECT tmp.key +FROM (SELECT x1.key AS key FROM src1 x1 GROUP BY x1.key + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) key)))) (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x2) (TOK_TABREF (TOK_TABNAME src1) y2) (= (. (TOK_TABLE_OR_COL x2) key) (. (TOK_TABLE_OR_COL y2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x2) key) key))))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp) key))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL tmp) key))))) + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-2 depends on stages: Stage-4, Stage-5 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + null-subquery1:tmp-subquery1:x1 + TableScan + alias: x1 + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Union + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string +#### A masked pattern was here #### + TableScan + Union + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + null-subquery2:tmp-subquery2:y2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + null-subquery2:tmp-subquery2:y2 + TableScan + alias: y2 + HashTable Sink Operator + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: + null-subquery2:tmp-subquery2:x2 + TableScan + alias: x2 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT tmp.key +FROM (SELECT x1.key AS key FROM src1 x1 GROUP BY x1.key + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT tmp.key +FROM (SELECT x1.key AS key FROM src1 x1 GROUP BY x1.key + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### + +128 +128 +128 +128 +146 +146 +146 +150 +150 +213 +213 +213 +224 +224 +224 +238 +238 +238 +255 +255 +255 +273 +273 +273 +273 +278 +278 +278 +311 +311 +311 +311 +369 +369 +369 +369 +401 +401 +401 +401 +401 +401 +406 +406 +406 +406 +406 +66 +66 +98 +98 +98 +PREHOOK: query: -- When hive.optimize.mapjoin.mapreduce=true, we will use two jobs. +-- We will generate one MR job for GROUP BY +-- on x1, one MR job for both the MapJoin of x2 and y2, the UNION ALL, and the +-- ORDER BY. +EXPLAIN +SELECT tmp.key +FROM (SELECT x1.key AS key FROM src1 x1 GROUP BY x1.key + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key +PREHOOK: type: QUERY +POSTHOOK: query: -- When hive.optimize.mapjoin.mapreduce=true, we will use two jobs. +-- We will generate one MR job for GROUP BY +-- on x1, one MR job for both the MapJoin of x2 and y2, the UNION ALL, and the +-- ORDER BY. +EXPLAIN +SELECT tmp.key +FROM (SELECT x1.key AS key FROM src1 x1 GROUP BY x1.key + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) key)))) (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x2) (TOK_TABREF (TOK_TABNAME src1) y2) (= (. (TOK_TABLE_OR_COL x2) key) (. (TOK_TABLE_OR_COL y2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x2) key) key))))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp) key))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL tmp) key))))) + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-6 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-6 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + null-subquery1:tmp-subquery1:x1 + TableScan + alias: x1 + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + null-subquery2:tmp-subquery2:y2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + null-subquery2:tmp-subquery2:y2 + TableScan + alias: y2 + HashTable Sink Operator + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Union + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + null-subquery2:tmp-subquery2:x2 + TableScan + alias: x2 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Union + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT tmp.key +FROM (SELECT x1.key AS key FROM src1 x1 GROUP BY x1.key + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT tmp.key +FROM (SELECT x1.key AS key FROM src1 x1 GROUP BY x1.key + UNION ALL + SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp +ORDER BY tmp.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### + +128 +128 +128 +128 +146 +146 +146 +150 +150 +213 +213 +213 +224 +224 +224 +238 +238 +238 +255 +255 +255 +273 +273 +273 +273 +278 +278 +278 +311 +311 +311 +311 +369 +369 +369 +369 +401 +401 +401 +401 +401 +401 +406 +406 +406 +406 +406 +66 +66 +98 +98 +98 +PREHOOK: query: -- When hive.optimize.mapjoin.mapreduce=false and Correlation Optimizer is disabled, +-- we will use 7 jobs. +-- We will generate one Map-only job for the MapJoin of x1 and y1, +-- one Map-only job for the Map-Join of x2 and y2, +-- one MR job for the aggregation in the sub-query tmp1, +-- one MR job for the aggregation in the sub-query tmp2, +-- one MR job for the Join of tmp1 and tmp2, +-- one MR job for aggregation on the result of the Join of tmp1 and tmp2, +-- and one MR job for the ORDER BY. +EXPLAIN +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +PREHOOK: type: QUERY +POSTHOOK: query: -- When hive.optimize.mapjoin.mapreduce=false and Correlation Optimizer is disabled, +-- we will use 7 jobs. +-- We will generate one Map-only job for the MapJoin of x1 and y1, +-- one Map-only job for the Map-Join of x2 and y2, +-- one MR job for the aggregation in the sub-query tmp1, +-- one MR job for the aggregation in the sub-query tmp2, +-- one MR job for the Join of tmp1 and tmp2, +-- one MR job for aggregation on the result of the Join of tmp1 and tmp2, +-- and one MR job for the ORDER BY. +EXPLAIN +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x1) (TOK_TABREF (TOK_TABNAME src1) y1) (= (. (TOK_TABLE_OR_COL x1) key) (. (TOK_TABLE_OR_COL y1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) key)))) tmp1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x2) (TOK_TABREF (TOK_TABNAME src1) y2) (= (. (TOK_TABLE_OR_COL x2) key) (. (TOK_TABLE_OR_COL y2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x2) key) key)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x2) key)))) tmp2) (= (. (TOK_TABLE_OR_COL tmp1) key) (. (TOK_TABLE_OR_COL tmp2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp1) key) key) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL tmp1) key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL cnt))))) + +STAGE DEPENDENCIES: + Stage-17 is a root stage + Stage-13 depends on stages: Stage-17 + Stage-2 depends on stages: Stage-13 + Stage-12 depends on stages: Stage-2, Stage-8 , consists of Stage-15, Stage-16, Stage-3 + Stage-15 has a backup stage: Stage-3 + Stage-10 depends on stages: Stage-15 + Stage-4 depends on stages: Stage-3, Stage-10, Stage-11 + Stage-5 depends on stages: Stage-4 + Stage-16 has a backup stage: Stage-3 + Stage-11 depends on stages: Stage-16 + Stage-3 + Stage-18 is a root stage + Stage-14 depends on stages: Stage-18 + Stage-8 depends on stages: Stage-14 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-17 + Map Reduce Local Work + Alias -> Map Local Tables: + tmp2:y2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + tmp2:y2 + TableScan + alias: y2 + HashTable Sink Operator + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + + Stage: Stage-13 + Map Reduce + Alias -> Map Operator Tree: + tmp2:x2 + TableScan + alias: x2 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-12 + Conditional Operator + + Stage: Stage-15 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + Position of Big Table: 0 + + Stage: Stage-10 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-16 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME1 + HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + Position of Big Table: 1 + + Stage: Stage-11 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + outputColumnNames: _col0 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 1 + $INTNAME1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + value expressions: + expr: _col0 + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + handleSkewJoin: false + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-18 + Map Reduce Local Work + Alias -> Map Local Tables: + tmp1:y1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + tmp1:y1 + TableScan + alias: y1 + HashTable Sink Operator + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + + Stage: Stage-14 + Map Reduce + Alias -> Map Operator Tree: + tmp1:x1 + TableScan + alias: x1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-8 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 1 +146 1 +150 1 +213 1 +224 1 +238 1 +255 1 +273 1 +278 1 +311 1 +369 1 +401 1 +406 1 +66 1 +98 1 +PREHOOK: query: -- When hive.optimize.mapjoin.mapreduce=true and Correlation Optimizer is enabled, +-- we will use two jobs. This first MR job will evaluate sub-queries of tmp1, tmp2, +-- the Join of tmp1 and tmp2, and the aggregation on the result of the Join of +-- tmp1 and tmp2. The second job will do the ORDER BY. +EXPLAIN +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +PREHOOK: type: QUERY +POSTHOOK: query: -- When hive.optimize.mapjoin.mapreduce=true and Correlation Optimizer is enabled, +-- we will use two jobs. This first MR job will evaluate sub-queries of tmp1, tmp2, +-- the Join of tmp1 and tmp2, and the aggregation on the result of the Join of +-- tmp1 and tmp2. The second job will do the ORDER BY. +EXPLAIN +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x1) (TOK_TABREF (TOK_TABNAME src1) y1) (= (. (TOK_TABLE_OR_COL x1) key) (. (TOK_TABLE_OR_COL y1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) key)))) tmp1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x2) (TOK_TABREF (TOK_TABNAME src1) y2) (= (. (TOK_TABLE_OR_COL x2) key) (. (TOK_TABLE_OR_COL y2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x2) key) key)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x2) key)))) tmp2) (= (. (TOK_TABLE_OR_COL tmp1) key) (. (TOK_TABLE_OR_COL tmp2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp1) key) key) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL tmp1) key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL cnt))))) + +STAGE DEPENDENCIES: + Stage-9 is a root stage + Stage-2 depends on stages: Stage-9 + Stage-3 depends on stages: Stage-2 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + tmp1:y1 + Fetch Operator + limit: -1 + tmp2:y2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + tmp1:y1 + TableScan + alias: y1 + HashTable Sink Operator + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + tmp2:y2 + TableScan + alias: y2 + HashTable Sink Operator + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + tmp1:x1 + TableScan + alias: x1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + tmp2:x2 + TableScan + alias: x2 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 1 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Demux Operator + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Mux Operator + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + handleSkewJoin: false + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Mux Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: _col0 + type: string + mode: complete + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Mux Operator + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + handleSkewJoin: false + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Mux Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: _col0 + type: string + mode: complete + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 1 +146 1 +150 1 +213 1 +224 1 +238 1 +255 1 +273 1 +278 1 +311 1 +369 1 +401 1 +406 1 +66 1 +98 1 +PREHOOK: query: -- When hive.optimize.mapjoin.mapreduce=false and Correlation Optimizer is disabled, +-- we will use fix jobs. +-- We will generate one MR job to evaluate the sub-query tmp1, +-- one Map-only job for the Map-Join of x2 and y2, +-- one MR job for the aggregation in the sub-query tmp2, +-- one MR job for the Join of tmp1 and tmp2, +-- one MR job for aggregation on the result of the Join of tmp1 and tmp2, +-- and one MR job for the ORDER BY. +EXPLAIN +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src1 x1 + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +PREHOOK: type: QUERY +POSTHOOK: query: -- When hive.optimize.mapjoin.mapreduce=false and Correlation Optimizer is disabled, +-- we will use fix jobs. +-- We will generate one MR job to evaluate the sub-query tmp1, +-- one Map-only job for the Map-Join of x2 and y2, +-- one MR job for the aggregation in the sub-query tmp2, +-- one MR job for the Join of tmp1 and tmp2, +-- one MR job for aggregation on the result of the Join of tmp1 and tmp2, +-- and one MR job for the ORDER BY. +EXPLAIN +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src1 x1 + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) key)))) tmp1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x2) (TOK_TABREF (TOK_TABNAME src1) y2) (= (. (TOK_TABLE_OR_COL x2) key) (. (TOK_TABLE_OR_COL y2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x2) key) key)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x2) key)))) tmp2) (= (. (TOK_TABLE_OR_COL tmp1) key) (. (TOK_TABLE_OR_COL tmp2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp1) key) key) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL tmp1) key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL cnt))))) + +STAGE DEPENDENCIES: + Stage-7 is a root stage + Stage-10 depends on stages: Stage-2, Stage-7 , consists of Stage-12, Stage-13, Stage-3 + Stage-12 has a backup stage: Stage-3 + Stage-8 depends on stages: Stage-12 + Stage-4 depends on stages: Stage-3, Stage-8, Stage-9 + Stage-5 depends on stages: Stage-4 + Stage-13 has a backup stage: Stage-3 + Stage-9 depends on stages: Stage-13 + Stage-3 + Stage-14 is a root stage + Stage-11 depends on stages: Stage-14 + Stage-2 depends on stages: Stage-11 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-7 + Map Reduce + Alias -> Map Operator Tree: + tmp1:x1 + TableScan + alias: x1 + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-10 + Conditional Operator + + Stage: Stage-12 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + Position of Big Table: 0 + + Stage: Stage-8 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-13 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME1 + HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + Position of Big Table: 1 + + Stage: Stage-9 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + outputColumnNames: _col0 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 1 + $INTNAME1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + value expressions: + expr: _col0 + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + handleSkewJoin: false + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-14 + Map Reduce Local Work + Alias -> Map Local Tables: + tmp2:y2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + tmp2:y2 + TableScan + alias: y2 + HashTable Sink Operator + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + + Stage: Stage-11 + Map Reduce + Alias -> Map Operator Tree: + tmp2:x2 + TableScan + alias: x2 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src1 x1 + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src1 x1 + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 1 +146 1 +150 1 +213 1 +224 1 +238 1 +255 1 +273 1 +278 1 +311 1 +369 1 +401 1 +406 1 +66 1 +98 1 +PREHOOK: query: -- When hive.optimize.mapjoin.mapreduce=true and Correlation Optimizer is enabled, +-- we will use two job. This first MR job will evaluate sub-queries of tmp1, tmp2, +-- the Join of tmp1 and tmp2, and the aggregation on the result of the Join of +-- tmp1 and tmp2. The second job will do the ORDER BY. +EXPLAIN +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src1 x1 + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +PREHOOK: type: QUERY +POSTHOOK: query: -- When hive.optimize.mapjoin.mapreduce=true and Correlation Optimizer is enabled, +-- we will use two job. This first MR job will evaluate sub-queries of tmp1, tmp2, +-- the Join of tmp1 and tmp2, and the aggregation on the result of the Join of +-- tmp1 and tmp2. The second job will do the ORDER BY. +EXPLAIN +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src1 x1 + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) key)))) tmp1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x2) (TOK_TABREF (TOK_TABNAME src1) y2) (= (. (TOK_TABLE_OR_COL x2) key) (. (TOK_TABLE_OR_COL y2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x2) key) key)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x2) key)))) tmp2) (= (. (TOK_TABLE_OR_COL tmp1) key) (. (TOK_TABLE_OR_COL tmp2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp1) key) key) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL tmp1) key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL cnt))))) + +STAGE DEPENDENCIES: + Stage-7 is a root stage + Stage-2 depends on stages: Stage-7 + Stage-3 depends on stages: Stage-2 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + tmp2:y2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + tmp2:y2 + TableScan + alias: y2 + HashTable Sink Operator + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + tmp1:x1 + TableScan + alias: x1 + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + tmp2:x2 + TableScan + alias: x2 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 1 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Demux Operator + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Mux Operator + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + handleSkewJoin: false + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Mux Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: _col0 + type: string + mode: complete + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Mux Operator + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + handleSkewJoin: false + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Mux Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: _col0 + type: string + mode: complete + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src1 x1 + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src1 x1 + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 1 +146 1 +150 1 +213 1 +224 1 +238 1 +255 1 +273 1 +278 1 +311 1 +369 1 +401 1 +406 1 +66 1 +98 1