Index: ql/src/java/org/apache/hadoop/hive/ql/exec/DummyStoreOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/DummyStoreOperator.java (revision 1447881) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DummyStoreOperator.java (working copy) @@ -95,4 +95,9 @@ public OperatorType getType() { return OperatorType.FORWARD; } + + @Override + public boolean columnNamesRowResolvedCanBeObtained() { + return true; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (revision 1447881) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (working copy) @@ -703,9 +703,10 @@ // traverse all the joins and convert them if necessary if (pGraphContext.getJoinContext() != null) { Map joinMap = new HashMap(); - Map mapJoinMap = pGraphContext.getMapJoinContext(); + Map, QBJoinTree> mapJoinMap = + pGraphContext.getMapJoinContext(); if (mapJoinMap == null) { - mapJoinMap = new HashMap(); + mapJoinMap = new HashMap, QBJoinTree>(); pGraphContext.setMapJoinContext(mapJoinMap); } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (revision 1447881) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (working copy) @@ -32,7 +32,6 @@ import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; @@ -71,7 +70,7 @@ private HashMap> topSelOps; private LinkedHashMap, OpParseContext> opParseCtx; private Map joinContext; - private Map mapJoinContext; + private Map, QBJoinTree> mapJoinContext; private Map smbMapJoinContext; private HashMap topToTable; private HashMap nameToSplitSample; @@ -524,11 +523,12 @@ return lInfo; } - public Map getMapJoinContext() { + public Map, QBJoinTree> getMapJoinContext() { return mapJoinContext; } - public void setMapJoinContext(Map mapJoinContext) { + public void setMapJoinContext(Map, + QBJoinTree> mapJoinContext) { this.mapJoinContext = mapJoinContext; } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1447881) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -8474,17 +8474,17 @@ opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, viewAliasToInput); + Optimizer optm = new Optimizer(); + optm.setPctx(pCtx); + optm.initialize(conf); + pCtx = optm.optimize(); + // Generate table access stats if required if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_TABLEKEYS) == true) { TableAccessAnalyzer tableAccessAnalyzer = new TableAccessAnalyzer(pCtx); setTableAccessInfo(tableAccessAnalyzer.analyzeTableAccess()); } - Optimizer optm = new Optimizer(); - optm.setPctx(pCtx); - optm.initialize(conf); - pCtx = optm.optimize(); - // Generate column access stats if required - wait until column pruning takes place // during optimization if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS) == true) { Index: ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessAnalyzer.java (revision 1447881) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessAnalyzer.java (working copy) @@ -26,11 +26,13 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator; import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; @@ -42,11 +44,15 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc; +import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; @@ -80,7 +86,9 @@ opRules.put(new RuleRegExp("R2", JoinOperator.getOperatorName() + "%"), new JoinProcessor(pGraphContext)); opRules.put(new RuleRegExp("R3", MapJoinOperator.getOperatorName() + "%"), - new JoinProcessor(pGraphContext)); + new MapJoinProcessor(pGraphContext)); + opRules.put(new RuleRegExp("R4", SMBMapJoinOperator.getOperatorName() + "%"), + new MapJoinProcessor(pGraphContext)); TableAccessCtx tableAccessCtx = new TableAccessCtx(); Dispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules, tableAccessCtx); @@ -116,7 +124,7 @@ @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) { + Object... nodeOutputs) throws SemanticException { GroupByOperator op = (GroupByOperator)nd; TableAccessCtx tableAccessCtx = (TableAccessCtx)procCtx; @@ -142,14 +150,83 @@ } Map> tableToKeysMap = new HashMap>(); + Map> tableToPartitionsMap = new HashMap>(); Table tbl = pGraphContext.getTopToTable().get(tso); tableToKeysMap.put(tbl.getCompleteName(), keyColNames); - tableAccessCtx.addOperatorTableAccess(op, tableToKeysMap); + tableToPartitionsMap.put(tbl, getPartitions(tso, tbl, tbl.getTableName())); + tableAccessCtx.addOperatorTableAccess(op, tableToKeysMap, tableToPartitionsMap); return null; } } + public class MapJoinProcessor implements NodeProcessor { + protected ParseContext pGraphContext; + + public MapJoinProcessor(ParseContext pGraphContext) { + this.pGraphContext = pGraphContext; + } + + @Override + @SuppressWarnings("unchecked") + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + AbstractMapJoinOperator op = + (AbstractMapJoinOperator)nd; + + QBJoinTree joinTree = pGraphContext.getMapJoinContext().get(op);; + List> parentOps = + ((AbstractMapJoinOperator)nd).getParentOperators(); + + TableAccessCtx tableAccessCtx = (TableAccessCtx)procCtx; + Map> tableToKeysMap = new HashMap>(); + Map> tableToPartitionsMap = new HashMap>(); + + // Get the key column names for each side of the join, + // and check if the keys are all constants + // or columns (not expressions). If yes, proceed. + assert(parentOps.size() == joinTree.getBaseSrc().length); + byte pos = 0; + for (String src : joinTree.getBaseSrc()) { + if (src != null) { + + // Get the key column names, and check if the keys are all constants + // or columns (not expressions). If yes, proceed. + List keyColNames = + TableAccessAnalyzer.getKeyColNames(op.getConf().getKeys().get(pos)); + + if (keyColNames == null) { + // we are done, since there are no keys to check for + return null; + } + + // Walk the operator tree to the TableScan and build the mapping + // along the way for the columns that the map join uses as keys + TableScanOperator tso = TableAccessAnalyzer.genRootTableScan( + op.getParentOperators().get(pos), keyColNames); + + if (tso == null) { + // Could not find an allowed path to a table scan operator, + // hence we are done + return null; + } + + Table tbl = pGraphContext.getTopToTable().get(tso); + tableToKeysMap.put(tbl.getCompleteName(), keyColNames); + tableToPartitionsMap.put(tbl, getPartitions(tso, tbl, src)); + } else { + return null; + } + pos++; + } + + // We only get here if we could map all join keys to source table columns + tableAccessCtx.addOperatorTableAccess(op, tableToKeysMap, tableToPartitionsMap); + return null; + } + } + /** * Processor for Join operator. */ @@ -162,17 +239,20 @@ @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) { + Object... nodeOutputs) throws SemanticException { + JoinOperator op = (JoinOperator)nd; + + QBJoinTree joinTree = pGraphContext.getJoinContext().get(op); + List> parentOps = op.getParentOperators(); + TableAccessCtx tableAccessCtx = (TableAccessCtx)procCtx; Map> tableToKeysMap = new HashMap>(); + Map> tableToPartitionsMap = new HashMap>(); - List> parentOps = op.getParentOperators(); - // Get the key column names for each side of the join, // and check if the keys are all constants // or columns (not expressions). If yes, proceed. - QBJoinTree joinTree = pGraphContext.getJoinContext().get(op); assert(parentOps.size() == joinTree.getBaseSrc().length); int pos = 0; for (String src : joinTree.getBaseSrc()) { @@ -191,7 +271,7 @@ } // Walk the operator tree to the TableScan and build the mapping - // along the way for the columns that the group by uses as keys + // along the way for the columns that the join uses as keys TableScanOperator tso = TableAccessAnalyzer.genRootTableScan( reduceSinkOp.getParentOperators().get(0), keyColNames); @@ -203,6 +283,7 @@ Table tbl = pGraphContext.getTopToTable().get(tso); tableToKeysMap.put(tbl.getCompleteName(), keyColNames); + tableToPartitionsMap.put(tbl, getPartitions(tso, tbl, src)); } else { return null; } @@ -210,7 +291,7 @@ } // We only get here if we could map all join keys to source table columns - tableAccessCtx.addOperatorTableAccess(op, tableToKeysMap); + tableAccessCtx.addOperatorTableAccess(op, tableToKeysMap, tableToPartitionsMap); return null; } } @@ -325,4 +406,38 @@ } return colList; } + + /** + * If the table is partitioned, returns a list of partitions that were not pruned, otherwise + * returns null. + * + * @param tso + * @param tbl + * @param alias + * @return + * @throws SemanticException + */ + private List getPartitions(TableScanOperator tso, Table tbl, String alias) + throws SemanticException { + + if (tbl.isPartitioned()) { + // If the table is partitioned get a list of the partitions that weren't pruned + PrunedPartitionList prunedParts = pGraphContext.getOpToPartList().get(tso); + if (prunedParts == null) { + try { + prunedParts = PartitionPruner.prune(tbl, pGraphContext.getOpToPartPruner().get(tso), + pGraphContext.getConf(), alias, + pGraphContext.getPrunedPartitions()); + } catch (HiveException e) { + LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); + throw new SemanticException(e.getMessage(), e); + } + pGraphContext.getOpToPartList().put(tso, prunedParts); + } + return prunedParts.getNotDeniedPartns(); + } else { + // The table isn't partitioned, return null + return null; + } + } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessCtx.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessCtx.java (revision 1447881) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessCtx.java (working copy) @@ -23,6 +23,8 @@ import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.plan.OperatorDesc; /** @@ -48,10 +50,11 @@ } public void addOperatorTableAccess(Operator op, - Map> tableToKeysMap) { + Map> tableToKeysMap, Map> inputsMap) { assert(tableToKeysMap != null); assert(op != null); - tableAccessInfo.add(op, tableToKeysMap); + tableAccessInfo.addTableToKeysMap(op, tableToKeysMap); + tableAccessInfo.addTableToPartitionsMap(op, inputsMap); } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessInfo.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessInfo.java (revision 1447881) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessInfo.java (working copy) @@ -23,6 +23,8 @@ import java.util.Map; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.plan.OperatorDesc; public class TableAccessInfo { @@ -30,20 +32,34 @@ * Map of operator id to table and key names. */ private final Map, - Map>> operatorToTableAccessMap; + Map>> operatorToTableAccessMap; + private final Map, + Map>> operatorToInputsMap; public TableAccessInfo() { operatorToTableAccessMap = - new HashMap, Map>>(); + new HashMap, Map>>(); + operatorToInputsMap = + new HashMap, Map>>(); } - public void add(Operator op, + public void addTableToKeysMap(Operator op, Map> tableToKeysMap) { operatorToTableAccessMap.put(op, tableToKeysMap); } + public void addTableToPartitionsMap(Operator op, + Map> inputsMap) { + operatorToInputsMap.put(op, inputsMap); + } + public Map, - Map>> getOperatorToTableAccessMap() { + Map>> getOperatorToTableAccessMap() { return operatorToTableAccessMap; } + + public Map, + Map>> getOperatorToInputsMap() { + return operatorToInputsMap; + } } Index: ql/src/test/org/apache/hadoop/hive/ql/hooks/CheckTableAccessHook.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/hooks/CheckTableAccessHook.java (revision 1447881) +++ ql/src/test/org/apache/hadoop/hive/ql/hooks/CheckTableAccessHook.java (working copy) @@ -17,20 +17,21 @@ */ package org.apache.hadoop.hive.ql.hooks; +import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.HashMap; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.QueryPlan; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.TableAccessInfo; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.plan.OperatorDesc; - /* * This hook is used for verifying the table access key information * that is generated and maintained in the QueryPlan object by the @@ -59,11 +60,11 @@ LogHelper console = SessionState.getConsole(); Map, Map>> operatorToTableAccessMap = - tableAccessInfo.getOperatorToTableAccessMap(); + tableAccessInfo.getOperatorToTableAccessMap(); // We need a new map to ensure output is always produced in the same order. // This makes tests that use this hook deterministic. - Map outputOrderedMap = new HashMap(); + Map outputKeysOrderedMap = new HashMap(); for (Map.Entry, Map>> tableAccess: operatorToTableAccessMap.entrySet()) { @@ -75,11 +76,44 @@ perOperatorInfo.append("Keys:").append(StringUtils.join(entry.getValue(), ',')) .append("\n"); } - outputOrderedMap.put(tableAccess.getKey().getOperatorId(), perOperatorInfo.toString()); + outputKeysOrderedMap.put(tableAccess.getKey().getOperatorId(), perOperatorInfo.toString()); } - for (String perOperatorInfo: outputOrderedMap.values()) { + // Construct a string showing for each operator in the TableAccesInfo the input tables and + // partitions + Map, Map>> operatorToInputsMap = + tableAccessInfo.getOperatorToInputsMap(); + + Map outputInputsOrderedMap = new HashMap(); + + for (Map.Entry, Map>> input : + operatorToInputsMap.entrySet()) { + StringBuilder perOperatorInputs = new StringBuilder(); + perOperatorInputs.append("Operator:").append(input.getKey().getOperatorId()).append("\n"); + for (Map.Entry> entry : input.getValue().entrySet()) { + perOperatorInputs.append("Table:").append(entry.getKey().getTableName()).append("\n"); + List partitions = entry.getValue(); + if (partitions != null && !partitions.isEmpty()) { + perOperatorInputs.append("Partitions:"); + for (int index = 0; index < partitions.size(); index++) { + perOperatorInputs.append(partitions.get(index).getName()); + if (index + 1 < partitions.size()) { + perOperatorInputs.append(","); + } + } + perOperatorInputs.append("\n"); + } + } + outputInputsOrderedMap.put(input.getKey().getOperatorId(), perOperatorInputs.toString()); + } + + + for (String perOperatorInfo: outputKeysOrderedMap.values()) { console.printError(perOperatorInfo); } + + for (String perOperatorInput : outputInputsOrderedMap.values()) { + console.printError(perOperatorInput); + } } } Index: ql/src/test/queries/clientpositive/table_access_keys_stats2.q =================================================================== --- ql/src/test/queries/clientpositive/table_access_keys_stats2.q (revision 0) +++ ql/src/test/queries/clientpositive/table_access_keys_stats2.q (working copy) @@ -0,0 +1,56 @@ +SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.CheckTableAccessHook; +SET hive.stats.collect.tablekeys=true; + +-- This test is used for testing the TableAccessAnalyzer + +CREATE TABLE t1 (key STRING, val STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) SORTED BY (key ASC) INTO 2 BUCKETS; + +CREATE TABLE t2 (key STRING, val STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) SORTED BY (key ASC) INTO 2 BUCKETS; + +SET hive.enforce.bucketing=true; +SET hive.enforce.sorting=true; + +LOAD DATA LOCAL INPATH '../data/files/srcbucket0.txt' INTO TABLE t1 PARTITION (part = '1'); +LOAD DATA LOCAL INPATH '../data/files/srcbucket1.txt' INTO TABLE t1 PARTITION (part = '1'); +LOAD DATA LOCAL INPATH '../data/files/srcbucket0.txt' INTO TABLE t1 PARTITION (part = '2'); +LOAD DATA LOCAL INPATH '../data/files/srcbucket1.txt' INTO TABLE t1 PARTITION (part = '2'); +LOAD DATA LOCAL INPATH '../data/files/srcbucket0.txt' INTO TABLE t2 PARTITION (part = '1'); +LOAD DATA LOCAL INPATH '../data/files/srcbucket1.txt' INTO TABLE t2 PARTITION (part = '1'); +LOAD DATA LOCAL INPATH '../data/files/srcbucket0.txt' INTO TABLE t2 PARTITION (part = '2'); +LOAD DATA LOCAL INPATH '../data/files/srcbucket1.txt' INTO TABLE t2 PARTITION (part = '2'); + +-- Test multiple partitions are stored as inputs +SELECT count(*) FROM t1 JOIN t2 ON t1.key = t2.key AND +t1.part IS NOT NULL AND t2.part IS NOT NULL; + + +set hive.optimize.bucketmapjoin = true; + +-- Test bucket map join +EXPLAIN +SELECT /*+ MAPJOIN(t1) */ count(*) FROM t1 JOIN t2 ON t1.key = t2.key AND +t1.part IS NOT NULL AND t2.part IS NOT NULL; + +SELECT /*+ MAPJOIN(t1) */ count(*) FROM t1 JOIN t2 ON t1.key = t2.key AND +t1.part IS NOT NULL AND t2.part IS NOT NULL; + +set hive.optimize.bucketmapjoin.sortedmerge = true; + +-- Test sort merge join +EXPLAIN +SELECT /*+ MAPJOIN(t1) */ count(*) FROM t1 JOIN t2 ON t1.key = t2.key AND +t1.part IS NOT NULL AND t2.part IS NOT NULL; + +SELECT /*+ MAPJOIN(t1) */ count(*) FROM t1 JOIN t2 ON t1.key = t2.key AND +t1.part IS NOT NULL AND t2.part IS NOT NULL; + +set hive.map.groupby.sorted = true; + +--Test map group by + +EXPLAIN +SELECT key, count(*) FROM t1 WHERE key < 3 GROUP BY key; + +SELECT key, count(*) FROM t1 WHERE key < 3 GROUP BY key; Index: ql/src/test/results/clientpositive/table_access_keys_stats.q.out =================================================================== --- ql/src/test/results/clientpositive/table_access_keys_stats.q.out (revision 1447881) +++ ql/src/test/results/clientpositive/table_access_keys_stats.q.out (working copy) @@ -18,6 +18,9 @@ Table:default@t1 Keys:key +Operator:GBY_2 +Table:t1 + 1 1 2 1 3 1 @@ -31,6 +34,9 @@ Table:default@t1 Keys:key,val +Operator:GBY_2 +Table:t1 + 1 11 1 2 12 1 3 13 1 @@ -46,6 +52,9 @@ Table:default@t1 Keys:key +Operator:GBY_3 +Table:t1 + 1 1 2 1 3 1 @@ -59,6 +68,9 @@ Table:default@t1 Keys:key +Operator:GBY_3 +Table:t1 + 1 1 2 1 3 1 @@ -73,6 +85,9 @@ Table:default@t1 Keys:key +Operator:GBY_2 +Table:t1 + 1 1 1 1 2 1 1 3 1 @@ -86,6 +101,9 @@ Table:default@t1 Keys:key,val +Operator:GBY_2 +Table:t1 + 1 1 11 1 2 1 12 1 3 1 13 1 @@ -100,6 +118,9 @@ Table:default@t1 Keys:key,val +Operator:GBY_2 +Table:t1 + 1 1 11 2 1 2 1 12 2 1 3 1 13 2 1 @@ -126,6 +147,9 @@ Table:default@t1 Keys:key +Operator:GBY_2 +Table:t1 + 2.0 1 4.0 1 6.0 1 @@ -148,6 +172,12 @@ Table:default@t1 Keys:key +Operator:GBY_2 +Table:t1 + +Operator:GBY_8 +Table:t1 + 1 1 2 1 3 1 @@ -175,6 +205,12 @@ Table:default@t1 Keys:key +Operator:GBY_2 +Table:t1 + +Operator:GBY_8 +Table:t1 + 1 1 1 1 2 1 2 1 3 1 3 1 @@ -197,6 +233,12 @@ Table:default@t1 Keys:key +Operator:GBY_2 +Table:t1 + +Operator:GBY_8 +Table:t1 + 1 1 1 11 1 2 1 2 12 1 3 1 3 13 1 @@ -214,6 +256,9 @@ Table:default@t1 Keys:key,val +Operator:GBY_3 +Table:t1 + 1 1 11 1 2 1 12 1 3 1 13 1 @@ -238,6 +283,9 @@ Table:default@t1 Keys:key,val +Operator:GBY_4 +Table:t1 + 1 2 11 1 2 2 12 1 3 2 13 1 @@ -260,6 +308,12 @@ Table:default@t1 Keys:key +Operator:GBY_2 +Table:t1 + +Operator:GBY_8 +Table:t1 + PREHOOK: query: -- simple joins SELECT * FROM T1 JOIN T2 @@ -275,6 +329,10 @@ Table:default@t1 Keys:key +Operator:JOIN_4 +Table:t2 +Table:t1 + 1 11 1 1 2 12 2 1 3 13 3 1 @@ -294,6 +352,10 @@ Table:default@t1 Keys:key,val +Operator:JOIN_4 +Table:t2 +Table:t1 + PREHOOK: query: -- map join SELECT /*+ MAPJOIN(a) */ * FROM T1 a JOIN T2 b @@ -302,12 +364,16 @@ PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### -Operator:JOIN_4 +Operator:MAPJOIN_7 Table:default@t2 Keys:key Table:default@t1 Keys:key +Operator:MAPJOIN_7 +Table:t2 +Table:t1 + 1 11 1 1 2 12 2 1 3 13 3 1 @@ -328,6 +394,10 @@ Table:default@t1 Keys:key +Operator:JOIN_6 +Table:t2 +Table:t1 + PREHOOK: query: -- subqueries SELECT * FROM @@ -349,6 +419,10 @@ Table:default@t1 Keys:val +Operator:JOIN_8 +Table:t2 +Table:t1 + PREHOOK: query: SELECT * FROM ( @@ -367,6 +441,10 @@ Table:default@t1 Keys:val +Operator:JOIN_6 +Table:t2 +Table:t1 + PREHOOK: query: -- with column aliases in subqueries SELECT * FROM @@ -388,6 +466,10 @@ Table:default@t1 Keys:val +Operator:JOIN_8 +Table:t2 +Table:t1 + PREHOOK: query: -- with constants in subqueries SELECT * FROM @@ -409,6 +491,10 @@ Table:default@t1 Keys:val,key +Operator:JOIN_6 +Table:t2 +Table:t1 + PREHOOK: query: -- multiple levels of constants in subqueries SELECT * FROM @@ -433,6 +519,10 @@ Table:default@t1 Keys:key +Operator:JOIN_7 +Table:t2 +Table:t1 + PREHOOK: query: -- no mapping on functions SELECT * FROM @@ -470,6 +560,10 @@ Table:default@t1 Keys:val,key +Operator:JOIN_6 +Table:t2 +Table:t1 + PREHOOK: query: -- join followed by union SELECT * FROM @@ -505,6 +599,13 @@ Table:default@t1 Keys:val,key +Operator:GBY_14 +Table:t3 + +Operator:JOIN_6 +Table:t2 +Table:t1 + 11.0 1 12.0 1 13.0 1 @@ -539,3 +640,7 @@ Table:default@t1 Keys:val,key +Operator:JOIN_6 +Table:t2 +Table:t1 + Index: ql/src/test/results/clientpositive/table_access_keys_stats2.q.out =================================================================== --- ql/src/test/results/clientpositive/table_access_keys_stats2.q.out (revision 0) +++ ql/src/test/results/clientpositive/table_access_keys_stats2.q.out (working copy) @@ -0,0 +1,341 @@ +PREHOOK: query: -- This test is used for testing the TableAccessAnalyzer + +CREATE TABLE t1 (key STRING, val STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) SORTED BY (key ASC) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: query: CREATE TABLE t2 (key STRING, val STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) SORTED BY (key ASC) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket0.txt' INTO TABLE t1 PARTITION (part = '1') +PREHOOK: type: LOAD +PREHOOK: Output: default@t1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket1.txt' INTO TABLE t1 PARTITION (part = '1') +PREHOOK: type: LOAD +PREHOOK: Output: default@t1@part=1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket0.txt' INTO TABLE t1 PARTITION (part = '2') +PREHOOK: type: LOAD +PREHOOK: Output: default@t1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket1.txt' INTO TABLE t1 PARTITION (part = '2') +PREHOOK: type: LOAD +PREHOOK: Output: default@t1@part=2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket0.txt' INTO TABLE t2 PARTITION (part = '1') +PREHOOK: type: LOAD +PREHOOK: Output: default@t2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket1.txt' INTO TABLE t2 PARTITION (part = '1') +PREHOOK: type: LOAD +PREHOOK: Output: default@t2@part=1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket0.txt' INTO TABLE t2 PARTITION (part = '2') +PREHOOK: type: LOAD +PREHOOK: Output: default@t2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket1.txt' INTO TABLE t2 PARTITION (part = '2') +PREHOOK: type: LOAD +PREHOOK: Output: default@t2@part=2 +PREHOOK: query: -- Test multiple partitions are stored as inputs +SELECT count(*) FROM t1 JOIN t2 ON t1.key = t2.key AND +t1.part IS NOT NULL AND t2.part IS NOT NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t1@part=1 +PREHOOK: Input: default@t1@part=2 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t2@part=1 +PREHOOK: Input: default@t2@part=2 +#### A masked pattern was here #### +Operator:JOIN_6 +Table:default@t2 +Keys:key +Table:default@t1 +Keys:key + +Operator:JOIN_6 +Table:t2 +Partitions:part=1,part=2 +Table:t1 +Partitions:part=1,part=2 + +12096 +PREHOOK: query: -- Test bucket map join +EXPLAIN +SELECT /*+ MAPJOIN(t1) */ count(*) FROM t1 JOIN t2 ON t1.key = t2.key AND +t1.part IS NOT NULL AND t2.part IS NOT NULL +PREHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME t1)) (TOK_TABREF (TOK_TABNAME t2)) (AND (AND (= (. (TOK_TABLE_OR_COL t1) key) (. (TOK_TABLE_OR_COL t2) key)) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL t1) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL t2) part))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST t1))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-3 + Map Reduce Local Work + Alias -> Map Local Tables: + t1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + t1 + TableScan + alias: t1 + HashTable Sink Operator + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 1 + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + t2 + TableScan + alias: t2 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 1 + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT /*+ MAPJOIN(t1) */ count(*) FROM t1 JOIN t2 ON t1.key = t2.key AND +t1.part IS NOT NULL AND t2.part IS NOT NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t1@part=1 +PREHOOK: Input: default@t1@part=2 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t2@part=1 +PREHOOK: Input: default@t2@part=2 +#### A masked pattern was here #### +Operator:MAPJOIN_15 +Table:default@t2 +Keys:key +Table:default@t1 +Keys:key + +Operator:MAPJOIN_15 +Table:t2 +Partitions:part=1,part=2 +Table:t1 +Partitions:part=1,part=2 + +12096 +PREHOOK: query: -- Test sort merge join +EXPLAIN +SELECT /*+ MAPJOIN(t1) */ count(*) FROM t1 JOIN t2 ON t1.key = t2.key AND +t1.part IS NOT NULL AND t2.part IS NOT NULL +PREHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME t1)) (TOK_TABREF (TOK_TABNAME t2)) (AND (AND (= (. (TOK_TABLE_OR_COL t1) key) (. (TOK_TABLE_OR_COL t2) key)) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL t1) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL t2) part))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST t1))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + t2 + TableScan + alias: t2 + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 1 + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT /*+ MAPJOIN(t1) */ count(*) FROM t1 JOIN t2 ON t1.key = t2.key AND +t1.part IS NOT NULL AND t2.part IS NOT NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t1@part=1 +PREHOOK: Input: default@t1@part=2 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t2@part=1 +PREHOOK: Input: default@t2@part=2 +#### A masked pattern was here #### +Operator:MAPJOIN_15 +Table:default@t2 +Keys:key +Table:default@t1 +Keys:key + +Operator:MAPJOIN_15 +Table:t2 +Partitions:part=1,part=2 +Table:t1 +Partitions:part=1,part=2 + +2010 +PREHOOK: query: --Test map group by + +EXPLAIN +SELECT key, count(*) FROM t1 WHERE key < 3 GROUP BY key +PREHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 3)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + t1 + TableScan + alias: t1 + Filter Operator + predicate: + expr: (key < 3.0) + type: boolean + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: key + type: string + mode: final + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT key, count(*) FROM t1 WHERE key < 3 GROUP BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t1@part=1 +PREHOOK: Input: default@t1@part=2 +#### A masked pattern was here #### +Operator:GBY_3 +Table:default@t1 +Keys:key + +Operator:GBY_3 +Table:t1 +Partitions:part=1,part=2 + +0 1 +2 1 +0 4 +2 1 +1 1 +0 1 +2 1 +0 4 +2 1 +1 1