Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java =================================================================== --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 919258) +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy) @@ -200,6 +200,7 @@ HIVEOPTPPD("hive.optimize.ppd", true), // predicate pushdown HIVEOPTGROUPBY("hive.optimize.groupby", true), // optimize group by HIVEOPTBUCKETMAPJOIN("hive.optimize.bucketmapjoin", false), // optimize bucket map join + HIVEOPTSORTMERGEBUCKETMAPJOINTRANSFORMTRUST("hive.optimize.bucketmapjoin.transform.trust", false), HIVEOPTSORTMERGEBUCKETMAPJOIN("hive.optimize.bucketmapjoin.sortedmerge", false), // try to use sorted merge bucket map join ; @@ -261,6 +262,7 @@ this.defaultBoolVal = defaultBoolVal; } + @Override public String toString() { return varname; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/BucketMatcher.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/BucketMatcher.java (revision 920539) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/BucketMatcher.java (working copy) @@ -25,9 +25,9 @@ import org.apache.hadoop.fs.Path; public interface BucketMatcher { - + public List getAliasBucketFiles(String currentInputFile, String refTableAlias, String alias); - + public void setAliasBucketFileNameMapping( LinkedHashMap>> aliasBucketFileNameMapping); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapper.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapper.java (revision 921994) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapper.java (working copy) @@ -69,9 +69,9 @@ private long nextCntr = 1; private String lastInputFile = null; private MapredLocalWork localWork = null; - - private ExecMapperContext execContext = new ExecMapperContext(); - + + private final ExecMapperContext execContext = new ExecMapperContext(); + public static class ExecMapperContext { boolean inputFileChanged = false; String currentInputFile; @@ -124,7 +124,7 @@ mo.setExecContext(execContext); mo.initializeLocalWork(jc); mo.initialize(jc, null); - + // initialize map local work localWork = mrwork.getMapLocalWork(); if (localWork == null) { @@ -171,7 +171,7 @@ mo.setOutputCollector(oc); mo.setReporter(rp); } - + if(inputFileChanged()) { if (this.localWork != null && (localWork.getInputFileChangeSensitive() || this.lastInputFile == null)) { @@ -179,7 +179,7 @@ } this.lastInputFile = HiveConf.getVar(jc, HiveConf.ConfVars.HADOOPMAPFILENAME); } - + try { if (mo.getDone()) { done = true; @@ -215,7 +215,7 @@ * mapper's input file, the work need to clear context and re-initialization * after the input file changed. This is first introduced to process bucket * map join. - * + * * @return */ private boolean inputFileChanged() { @@ -240,12 +240,12 @@ int fetchOpRows = 0; String alias = entry.getKey(); FetchOperator fetchOp = entry.getValue(); - + if(inputFileChangeSenstive) { fetchOp.clearFetchContext(); setUpFetchOpContext(fetchOp, alias); } - + Operator forwardOp = localWork .getAliasToWork().get(alias); @@ -283,7 +283,7 @@ } } } - + private void setUpFetchOpContext(FetchOperator fetchOp, String alias) throws Exception { String currentInputFile = HiveConf.getVar(jc, HiveConf.ConfVars.HADOOPMAPFILENAME); @@ -297,7 +297,7 @@ Iterator iter = aliasFiles.iterator(); fetchOp.setupContext(iter, null); } - + private long getNextCntr(long cntr) { // A very simple counter to keep track of number of rows processed by the @@ -345,6 +345,9 @@ l4j.error("Hit error while closing operators - failing tree"); throw new RuntimeException("Hive Runtime Error while closing operators", e); } + } finally { + oc = null; + mo.setOutputCollector(oc); } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (revision 920539) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (working copy) @@ -21,7 +21,6 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.HashMap; -import java.util.List; import java.util.Map; import org.apache.commons.logging.Log; @@ -39,10 +38,7 @@ import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.util.ReflectionUtils; @@ -119,7 +115,7 @@ transient int metadataKeyTag; transient int[] metadataValueTag; transient int maxMapJoinSize; - + public MapJoinOperator() { } @@ -130,7 +126,7 @@ @Override protected void initializeOp(Configuration hconf) throws HiveException { super.initializeOp(hconf); - + maxMapJoinSize = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEMAXMAPJOINSIZE); @@ -317,10 +313,10 @@ } super.closeOp(abort); } - + /** * Implements the getName function for the Node Interface. - * + * * @return the name of the operator */ @Override Index: ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (revision 920539) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (working copy) @@ -205,7 +205,7 @@ * optimizer and built during semantic analysis contains only key elements for * reduce sink and group by op */ - protected transient Map colExprMap; + transient Map colExprMap; public void setId(String id) { this.id = id; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java (revision 919258) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java (working copy) @@ -36,7 +36,6 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; @@ -57,17 +56,13 @@ import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.QBJoinTree; import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; /** *this transformation does bucket map join optimization. */ public class BucketMapJoinOptimizer implements Transform { - + private static final Log LOG = LogFactory.getLog(GroupByOptimizer.class .getName()); @@ -85,8 +80,6 @@ opRules.put(new RuleRegExp("R2", "RS%.*MAPJOIN"), getBucketMapjoinRejectProc(pctx)); opRules.put(new RuleRegExp(new String("R3"), "UNION%.*MAPJOIN%"), getBucketMapjoinRejectProc(pctx)); - opRules.put(new RuleRegExp(new String("R4"), "MAPJOIN%.*MAPJOIN%"), - getBucketMapjoinRejectProc(pctx)); // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along @@ -130,11 +123,11 @@ } }; } - + class BucketMapjoinOptProc implements NodeProcessor { - + protected ParseContext pGraphContext; - + public BucketMapjoinOptProc(ParseContext pGraphContext) { super(); this.pGraphContext = pGraphContext; @@ -146,147 +139,68 @@ MapJoinOperator mapJoinOp = (MapJoinOperator) nd; BucketMapjoinOptProcCtx context = (BucketMapjoinOptProcCtx) procCtx; - if(context.getListOfRejectedMapjoins().contains(mapJoinOp)) + if(context.getListOfRejectedMapjoins().contains(mapJoinOp)) { return null; - + } + QBJoinTree joinCxt = this.pGraphContext.getMapJoinContext().get(mapJoinOp); - if(joinCxt == null) + if(joinCxt == null) { return null; - - List joinAliases = new ArrayList(); - String[] srcs = joinCxt.getBaseSrc(); - String[] left = joinCxt.getLeftAliases(); - List mapAlias = joinCxt.getMapAliases(); - String baseBigAlias = null; - for(String s : left) { - if(s != null && !joinAliases.contains(s)) { - joinAliases.add(s); - if(!mapAlias.contains(s)) { - baseBigAlias = s; - } - } } - for(String s : srcs) { - if(s != null && !joinAliases.contains(s)) { - joinAliases.add(s); - if(!mapAlias.contains(s)) { - baseBigAlias = s; - } - } + + MapJoinOptimizeContext bucketMjCtx = MapJoinOptimizeContext.getMapJoinOptimizeContext(this.pGraphContext, mapJoinOp); + if(bucketMjCtx == null) { + return null; + } + Map, List>> bucketMapJoinOpContext = bucketMjCtx + .getAliasToScanOpToJoinColsMapping().get(mapJoinOp); + + if(bucketMapJoinOpContext == null) { + return null; } - - MapJoinDesc mjDecs = mapJoinOp.getConf(); + + Map topToTable = this.pGraphContext.getTopToTable(); + LinkedHashMap aliasToBucketNumberMapping = new LinkedHashMap(); LinkedHashMap> aliasToBucketFileNamesMapping = new LinkedHashMap>(); - // right now this code does not work with "a join b on a.key = b.key and - // a.ds = b.ds", where ds is a partition column. It only works with joins - // with only one partition presents in each join source tables. - Map> topOps = this.pGraphContext.getTopOps(); - Map topToTable = this.pGraphContext.getTopToTable(); - // (partition to bucket file names) and (partition to bucket number) for // the big table; LinkedHashMap> bigTblPartsToBucketFileNames = new LinkedHashMap>(); LinkedHashMap bigTblPartsToBucketNumber = new LinkedHashMap(); - - for (int index = 0; index < joinAliases.size(); index++) { - String alias = joinAliases.get(index); - TableScanOperator tso = (TableScanOperator) topOps.get(alias); - if (tso == null) + List joinAliases = new ArrayList(); + String baseBigTableAlias = MapJoinOptimizeContext.getJoinAliases(joinCxt, joinAliases); + String bigTblName = null; + for (int tag = 0; tag < joinAliases.size(); tag++) { + String alias = joinAliases.get(tag); + + Map, List> columnTableScanOpMap = bucketMapJoinOpContext.get((byte)tag); + if(columnTableScanOpMap == null) { return null; - Table tbl = topToTable.get(tso); - if(tbl.isPartitioned()) { - PrunedPartitionList prunedParts = null; - try { - prunedParts = PartitionPruner.prune(tbl, pGraphContext.getOpToPartPruner().get(tso), pGraphContext.getConf(), alias, - pGraphContext.getPrunedPartitions()); - } catch (HiveException e) { - // Has to use full name to make sure it does not conflict with - // org.apache.commons.lang.StringUtils - LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); - throw new SemanticException(e.getMessage(), e); - } - int partNumber = prunedParts.getConfirmedPartns().size() - + prunedParts.getUnknownPartns().size(); - - if (partNumber > 1) { - // only allow one partition for small tables - if(alias != baseBigAlias) { - return null; - } - // here is the big table,and we get more than one partitions. - // construct a mapping of (Partition->bucket file names) and - // (Partition -> bucket number) - Iterator iter = prunedParts.getConfirmedPartns() - .iterator(); - while (iter.hasNext()) { - Partition p = iter.next(); - if (!checkBucketColumns(p.getBucketCols(), mjDecs, index)) { - return null; - } - List fileNames = getOnePartitionBucketFileNames(p); - bigTblPartsToBucketFileNames.put(p, fileNames); - bigTblPartsToBucketNumber.put(p, p.getBucketCount()); - } - iter = prunedParts.getUnknownPartns().iterator(); - while (iter.hasNext()) { - Partition p = iter.next(); - if (!checkBucketColumns(p.getBucketCols(), mjDecs, index)) { - return null; - } - List fileNames = getOnePartitionBucketFileNames(p); - bigTblPartsToBucketFileNames.put(p, fileNames); - bigTblPartsToBucketNumber.put(p, p.getBucketCount()); - } - // If there are more than one partition for the big - // table,aliasToBucketFileNamesMapping and partsToBucketNumber will - // not contain mappings for the big table. Instead, the mappings are - // contained in bigTblPartsToBucketFileNames and - // bigTblPartsToBucketNumber - - } else { - Partition part = null; - Iterator iter = prunedParts.getConfirmedPartns() - .iterator(); - part = iter.next(); - if (part == null) { - iter = prunedParts.getUnknownPartns().iterator(); - part = iter.next(); - } - assert part != null; - Integer num = new Integer(part.getBucketCount()); - aliasToBucketNumberMapping.put(alias, num); - if (!checkBucketColumns(part.getBucketCols(), mjDecs, index)) { - return null; - } - List fileNames = getOnePartitionBucketFileNames(part); - aliasToBucketFileNamesMapping.put(alias, fileNames); - if (alias == baseBigAlias) { - bigTblPartsToBucketFileNames.put(part, fileNames); - bigTblPartsToBucketNumber.put(part, num); - } - } - } else { - if (!checkBucketColumns(tbl.getBucketCols(), mjDecs, index)) - return null; - Integer num = new Integer(tbl.getNumBuckets()); - aliasToBucketNumberMapping.put(alias, num); - List fileNames = new ArrayList(); - try { - FileSystem fs = FileSystem.get(this.pGraphContext.getConf()); - FileStatus[] files = fs.listStatus(new Path(tbl.getDataLocation().toString())); - if(files != null) { - for(FileStatus file : files) { - fileNames.add(file.getPath().toString()); - } - } - } catch (IOException e) { - throw new SemanticException(e); + } + + boolean succeed = true; + for(Map.Entry, List> entry : columnTableScanOpMap.entrySet()) { + TableScanOperator topScabnOp = (TableScanOperator)entry.getKey(); + List usedJoinCols = entry.getValue(); + Table tbl = this.pGraphContext.getTopToTable().get(topScabnOp); + boolean isBigTbl = (alias == baseBigTableAlias) || alias.equals(baseBigTableAlias); + + String tsoAlias = MapJoinOptimizeContext.reverseLookupTableScanMap(this.pGraphContext, topScabnOp); + + if (isBigTbl) { + bigTblName = tsoAlias; } - aliasToBucketFileNamesMapping.put(alias, fileNames); + succeed = succeed && getOneTableBucketFileNameMapping(mapJoinOp, + aliasToBucketNumberMapping, + aliasToBucketFileNamesMapping, topToTable, + bigTblPartsToBucketFileNames, bigTblPartsToBucketNumber, tag, + tbl, tsoAlias, usedJoinCols, topScabnOp, isBigTbl); + } + if(!succeed) { + return null; } } - + // All tables or partitions are bucketed, and their bucket number is // stored in 'bucketNumbers', we need to check if the number of buckets in // the big table can be divided by no of buckets in small tables. @@ -301,38 +215,39 @@ } } } else { - int bucketNoInBigTbl = aliasToBucketNumberMapping.get(baseBigAlias).intValue(); + int bucketNoInBigTbl = aliasToBucketNumberMapping.get(bigTblName).intValue(); if (!checkBucketNumberAgainstBigTable(aliasToBucketNumberMapping, bucketNoInBigTbl)) { return null; } } - + MapJoinDesc desc = mapJoinOp.getConf(); - - LinkedHashMap>> aliasBucketFileNameMapping = + + LinkedHashMap>> finalAliasBucketFileNameMapping = new LinkedHashMap>>(); - - //sort bucket names for the big table + + //sort bucket names for the big table if(bigTblPartsToBucketNumber.size() > 0) { Collection> bucketNamesAllParts = bigTblPartsToBucketFileNames.values(); for(List partBucketNames : bucketNamesAllParts) { Collections.sort(partBucketNames); } } else { - Collections.sort(aliasToBucketFileNamesMapping.get(baseBigAlias)); + Collections.sort(aliasToBucketFileNamesMapping.get(bigTblName)); } - + // go through all small tables and get the mapping from bucket file name - // in the big table to bucket file names in small tables. - for (int j = 0; j < joinAliases.size(); j++) { - String alias = joinAliases.get(j); - if(alias.equals(baseBigAlias)) + // in the big table to bucket file names in small tables. + for(Map.Entry> tblNameMapping : aliasToBucketFileNamesMapping.entrySet()) { + String alias = tblNameMapping.getKey(); + if(alias.equals(bigTblName)) { continue; + } Collections.sort(aliasToBucketFileNamesMapping.get(alias)); LinkedHashMap> mapping = new LinkedHashMap>(); - aliasBucketFileNameMapping.put(alias, mapping); - + finalAliasBucketFileNameMapping.put(alias, mapping); + // for each bucket file in big table, get the corresponding bucket file // name in the small table. if (bigTblPartsToBucketNumber.size() > 0) { @@ -345,23 +260,131 @@ assert bigTblPartToBucketNum.hasNext(); int bigTblBucketNum = bigTblPartToBucketNum.next().getValue().intValue(); List bigTblBucketNameList = bigTblPartToBucketNames.next().getValue(); - fillMapping(baseBigAlias, aliasToBucketNumberMapping, + fillMapping(bigTblName, aliasToBucketNumberMapping, aliasToBucketFileNamesMapping, alias, mapping, bigTblBucketNum, bigTblBucketNameList); } } else { - List bigTblBucketNameList = aliasToBucketFileNamesMapping.get(baseBigAlias); - int bigTblBucketNum = aliasToBucketNumberMapping.get(baseBigAlias); - fillMapping(baseBigAlias, aliasToBucketNumberMapping, + List bigTblBucketNameList = aliasToBucketFileNamesMapping.get(bigTblName); + int bigTblBucketNum = aliasToBucketNumberMapping.get(bigTblName); + fillMapping(bigTblName, aliasToBucketNumberMapping, aliasToBucketFileNamesMapping, alias, mapping, bigTblBucketNum, bigTblBucketNameList); } } - desc.setAliasBucketFileNameMapping(aliasBucketFileNameMapping); - desc.setBigTableAlias(baseBigAlias); + desc.setAliasBucketFileNameMapping(finalAliasBucketFileNameMapping); + desc.setBigTableAlias(bigTblName); return null; } + private boolean getOneTableBucketFileNameMapping(MapJoinOperator mapJoinOp, + LinkedHashMap aliasToBucketNumberMapping, + LinkedHashMap> aliasToBucketFileNamesMapping, + Map topToTable, + LinkedHashMap> bigTblPartsToBucketFileNames, + LinkedHashMap bigTblPartsToBucketNumber, int index, + Table tbl, String tsoAlias, List usedJoinCols, TableScanOperator tso, boolean bigTbl) throws SemanticException { + if(tbl == null) { + return false; + } + + String alias = tsoAlias; + + if(tbl.isPartitioned()) { + PrunedPartitionList prunedParts = null; + try { + prunedParts = PartitionPruner.prune(tbl, pGraphContext.getOpToPartPruner().get(tso), pGraphContext.getConf(), alias, + pGraphContext.getPrunedPartitions()); + } catch (HiveException e) { + // Has to use full name to make sure it does not conflict with + // org.apache.commons.lang.StringUtils + LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); + throw new SemanticException(e.getMessage(), e); + } + int partNumber = prunedParts.getConfirmedPartns().size() + + prunedParts.getUnknownPartns().size(); + + if (partNumber > 1) { + // only allow one partition for small tables + if(!bigTbl) { + return false; + } + // here is the big table,and we get more than one partitions. + // construct a mapping of (Partition->bucket file names) and + // (Partition -> bucket number) + Iterator iter = prunedParts.getConfirmedPartns() + .iterator(); + while (iter.hasNext()) { + Partition p = iter.next(); + if (!checkBucketColumns(p.getBucketCols(), usedJoinCols)) { + return false; + } + List fileNames = getOnePartitionBucketFileNames(p); + bigTblPartsToBucketFileNames.put(p, fileNames); + bigTblPartsToBucketNumber.put(p, p.getBucketCount()); + } + iter = prunedParts.getUnknownPartns().iterator(); + while (iter.hasNext()) { + Partition p = iter.next(); + if (!checkBucketColumns(p.getBucketCols(), usedJoinCols)) { + return false; + } + List fileNames = getOnePartitionBucketFileNames(p); + bigTblPartsToBucketFileNames.put(p, fileNames); + bigTblPartsToBucketNumber.put(p, p.getBucketCount()); + } + // If there are more than one partition for the big + // table,aliasToBucketFileNamesMapping and partsToBucketNumber will + // not contain mappings for the big table. Instead, the mappings are + // contained in bigTblPartsToBucketFileNames and + // bigTblPartsToBucketNumber + + } else { + Partition part = null; + Iterator iter = prunedParts.getConfirmedPartns() + .iterator(); + part = iter.next(); + if (part == null) { + iter = prunedParts.getUnknownPartns().iterator(); + part = iter.next(); + } + assert part != null; + Integer num = new Integer(part.getBucketCount()); + aliasToBucketNumberMapping.put(alias, num); + if (!checkBucketColumns(part.getBucketCols(), usedJoinCols)) { + return false; + } + List fileNames = getOnePartitionBucketFileNames(part); + aliasToBucketFileNamesMapping.put(alias, fileNames); + if (bigTbl) { + bigTblPartsToBucketFileNames.put(part, fileNames); + bigTblPartsToBucketNumber.put(part, num); + } + } + } else { + if (!checkBucketColumns(tbl.getBucketCols(), usedJoinCols)) { + return false; + } + Integer num = new Integer(tbl.getNumBuckets()); + aliasToBucketNumberMapping.put(alias, num); + List fileNames = new ArrayList(); + try { + FileSystem fs = FileSystem.get(this.pGraphContext.getConf()); + FileStatus[] files = fs.listStatus(new Path(tbl.getDataLocation().toString())); + if(files != null) { + for(FileStatus file : files) { + fileNames.add(file.getPath().toString()); + } + } + } catch (IOException e) { + throw new SemanticException(e); + } + aliasToBucketFileNamesMapping.put(alias, fileNames); + } + + return true; + } + private void fillMapping(String baseBigAlias, LinkedHashMap aliasToBucketNumberMapping, LinkedHashMap> aliasToBucketFileNamesMapping, @@ -400,8 +423,9 @@ int nxt = iter.next().intValue(); boolean ok = (nxt >= bucketNumberInPart) ? nxt % bucketNumberInPart == 0 : bucketNumberInPart % nxt == 0; - if(!ok) + if(!ok) { return false; + } } return true; } @@ -423,57 +447,37 @@ } return fileNames; } - - private boolean checkBucketColumns(List bucketColumns, MapJoinDesc mjDesc, int index) { - List keys = mjDesc.getKeys().get((byte)index); - if (keys == null || bucketColumns == null || bucketColumns.size() == 0) - return false; - - //get all join columns from join keys stored in MapJoinDesc - List joinCols = new ArrayList(); - List joinKeys = new ArrayList(); - joinKeys.addAll(keys); - while (joinKeys.size() > 0) { - ExprNodeDesc node = joinKeys.remove(0); - if (node instanceof ExprNodeColumnDesc) { - joinCols.addAll(node.getCols()); - } else if (node instanceof ExprNodeGenericFuncDesc) { - ExprNodeGenericFuncDesc udfNode = ((ExprNodeGenericFuncDesc) node); - GenericUDF udf = udfNode.getGenericUDF(); - if (!FunctionRegistry.isDeterministic(udf)) { - return false; - } - joinKeys.addAll(0, udfNode.getChildExprs()); - } else { - return false; - } - } + private boolean checkBucketColumns(List bucketColumns, List usedJoinCols) { // to see if the join columns from a table is exactly this same as its - // bucket columns - if (joinCols.size() == 0 || joinCols.size() != bucketColumns.size()) { + // bucket columns + if (usedJoinCols.size() == 0 || usedJoinCols.size() != bucketColumns.size()) { return false; } - - for (String col : joinCols) { - if (!bucketColumns.contains(col)) - return false; + + for (String col : usedJoinCols) { + boolean found = false; + for(String c : bucketColumns) { + if(c == col || c.equals(col)) { + found = true; + } + if(!found) { + return false; + } + } } - return true; } - } - + class BucketMapjoinOptProcCtx implements NodeProcessorCtx { // we only convert map joins that follows a root table scan in the same // mapper. That means there is no reducer between the root table scan and // mapjoin. Set listOfRejectedMapjoins = new HashSet(); - + public Set getListOfRejectedMapjoins() { return listOfRejectedMapjoins; } - } } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (revision 919258) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (working copy) @@ -35,7 +35,6 @@ import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; @@ -62,7 +61,6 @@ import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MapredLocalWork; -import org.apache.hadoop.hive.ql.plan.MapredLocalWork.BucketMapJoinContext; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.PlanUtils; @@ -70,6 +68,7 @@ import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc; +import org.apache.hadoop.hive.ql.plan.MapredLocalWork.BucketMapJoinContext; /** * General utility common functions for the Processor to convert operator into @@ -84,7 +83,7 @@ /** * Initialize the current plan by adding it to root tasks. - * + * * @param op * the reduce sink operator encountered * @param opProcCtx @@ -130,9 +129,15 @@ opProcCtx.setCurrAliasId(currAliasId); } + public static void initMapJoinPlan( + Operator op, GenMRProcContext ctx, + boolean readInputMapJoin, boolean readInputUnion, boolean setReducer, int pos) throws SemanticException { + initMapJoinPlan(op, ctx, readInputMapJoin, readInputUnion, setReducer, pos, false); + } + /** * Initialize the current plan by adding it to root tasks. - * + * * @param op * the map join operator encountered * @param opProcCtx @@ -142,7 +147,7 @@ */ public static void initMapJoinPlan(Operator op, GenMRProcContext opProcCtx, boolean readInputMapJoin, - boolean readInputUnion, boolean setReducer, int pos) + boolean readInputUnion, boolean setReducer, int pos, boolean createLocalPlan) throws SemanticException { Map, GenMapRedCtx> mapCurrCtx = opProcCtx .getMapCurrCtx(); @@ -159,7 +164,7 @@ // The mapjoin has already been encountered. Some context must be stored // about that if (readInputMapJoin) { - AbstractMapJoinOperator currMapJoinOp = (AbstractMapJoinOperator) opProcCtx.getCurrMapJoinOp(); + AbstractMapJoinOperator currMapJoinOp = opProcCtx.getCurrMapJoinOp(); assert currMapJoinOp != null; boolean local = ((pos == -1) || (pos == (currMapJoinOp.getConf()) .getPosBigTable())) ? false : true; @@ -197,7 +202,7 @@ } setTaskPlan(taskTmpDir, taskTmpDir, rootOp, plan, local, tt_desc); - setupBucketMapJoinInfo(plan, currMapJoinOp); + setupBucketMapJoinInfo(plan, currMapJoinOp, createLocalPlan); } else { initUnionPlan(opProcCtx, currTask, false); } @@ -219,7 +224,7 @@ seenOps.add(currTopOp); boolean local = (pos == desc.getPosBigTable()) ? false : true; setTaskPlan(currAliasId, currTopOp, plan, local, opProcCtx); - setupBucketMapJoinInfo(plan, (AbstractMapJoinOperator)op); + setupBucketMapJoinInfo(plan, (AbstractMapJoinOperator)op, createLocalPlan); } opProcCtx.setCurrTask(currTask); @@ -228,9 +233,9 @@ } private static void setupBucketMapJoinInfo(MapredWork plan, - AbstractMapJoinOperator currMapJoinOp) { + AbstractMapJoinOperator currMapJoinOp, boolean createLocalPlan) { if (currMapJoinOp != null) { - LinkedHashMap>> aliasBucketFileNameMapping = + LinkedHashMap>> aliasBucketFileNameMapping = currMapJoinOp.getConf().getAliasBucketFileNameMapping(); if(aliasBucketFileNameMapping!= null) { MapredLocalWork localPlan = plan.getMapLocalWork(); @@ -238,7 +243,7 @@ if(currMapJoinOp instanceof SMBMapJoinOperator) { localPlan = ((SMBMapJoinOperator)currMapJoinOp).getConf().getLocalWork(); } - if (localPlan == null) { + if (localPlan == null && createLocalPlan) { localPlan = new MapredLocalWork( new LinkedHashMap>(), new LinkedHashMap()); @@ -253,6 +258,11 @@ } } } + + if(localPlan == null) { + return; + } + if(currMapJoinOp instanceof SMBMapJoinOperator) { plan.setMapLocalWork(null); ((SMBMapJoinOperator)currMapJoinOp).getConf().setLocalWork(localPlan); @@ -271,7 +281,7 @@ /** * Initialize the current union plan. - * + * * @param op * the reduce sink operator encountered * @param opProcCtx @@ -333,9 +343,17 @@ } } + + public static void joinPlan(Operator op, + Task oldTask, Task task, + GenMRProcContext opProcCtx, int pos, boolean split, + boolean readMapJoinData, boolean readUnionData) throws SemanticException { + joinPlan(op, oldTask, task, opProcCtx, pos, split, readMapJoinData, readUnionData, false); + } + /** * Merge the current task with the task for the current reducer. - * + * * @param op * operator being processed * @param oldTask @@ -350,7 +368,7 @@ public static void joinPlan(Operator op, Task oldTask, Task task, GenMRProcContext opProcCtx, int pos, boolean split, - boolean readMapJoinData, boolean readUnionData) throws SemanticException { + boolean readMapJoinData, boolean readUnionData, boolean createLocalWork) throws SemanticException { Task currTask = task; MapredWork plan = (MapredWork) currTask.getWork(); Operator currTopOp = opProcCtx.getCurrTopOp(); @@ -386,13 +404,13 @@ } setTaskPlan(currAliasId, currTopOp, plan, local, opProcCtx); if(op instanceof AbstractMapJoinOperator) { - setupBucketMapJoinInfo(plan, (AbstractMapJoinOperator)op); + setupBucketMapJoinInfo(plan, (AbstractMapJoinOperator)op, createLocalWork); } } currTopOp = null; opProcCtx.setCurrTopOp(currTopOp); } else if (opProcCtx.getCurrMapJoinOp() != null) { - AbstractMapJoinOperator mjOp = (AbstractMapJoinOperator) opProcCtx.getCurrMapJoinOp(); + AbstractMapJoinOperator mjOp = opProcCtx.getCurrMapJoinOp(); if (readUnionData) { initUnionPlan(opProcCtx, currTask, false); } else { @@ -400,7 +418,7 @@ // In case of map-join followed by map-join, the file needs to be // obtained from the old map join - AbstractMapJoinOperator oldMapJoin = (AbstractMapJoinOperator) mjCtx.getOldMapJoin(); + AbstractMapJoinOperator oldMapJoin = mjCtx.getOldMapJoin(); String taskTmpDir = null; TableDesc tt_desc = null; Operator rootOp = null; @@ -420,19 +438,22 @@ boolean local = ((pos == -1) || (pos == (mjOp.getConf()) .getPosBigTable())) ? false : true; setTaskPlan(taskTmpDir, taskTmpDir, rootOp, plan, local, tt_desc); - setupBucketMapJoinInfo(plan, oldMapJoin); + setupBucketMapJoinInfo(plan, oldMapJoin, createLocalWork); } opProcCtx.setCurrMapJoinOp(null); if ((oldTask != null) && (parTasks != null)) { for (Task parTask : parTasks) { parTask.addDependentTask(currTask); + if(opProcCtx.getRootTasks().contains(currTask)) { + opProcCtx.getRootTasks().remove(currTask); + if(!opProcCtx.getRootTasks().contains(parTask)) { + opProcCtx.getRootTasks().add(parTask); + } + } } } - if (opProcCtx.getRootTasks().contains(currTask)) { - opProcCtx.getRootTasks().remove(currTask); - } } opProcCtx.setCurrTask(currTask); @@ -440,7 +461,7 @@ /** * Split the current plan by creating a temporary destination. - * + * * @param op * the reduce sink operator encountered * @param opProcCtx @@ -472,7 +493,7 @@ /** * set the current task in the mapredWork. - * + * * @param alias_id * current alias * @param topOp @@ -631,7 +652,7 @@ /** * set the current task in the mapredWork. - * + * * @param alias * current alias * @param topOp @@ -647,6 +668,10 @@ Operator topOp, MapredWork plan, boolean local, TableDesc tt_desc) throws SemanticException { + if(path == null || alias == null) { + return; + } + if (!local) { if (plan.getPathToAliases().get(path) == null) { plan.getPathToAliases().put(path, new ArrayList()); @@ -673,7 +698,7 @@ /** * set key and value descriptor. - * + * * @param plan * current plan * @param topOp @@ -707,7 +732,7 @@ /** * create a new plan and return. - * + * * @return the new plan */ public static MapredWork getMapRedWork() { @@ -723,7 +748,7 @@ /** * insert in the map for the operator to row resolver. - * + * * @param op * operator created * @param rr @@ -857,7 +882,7 @@ opProcCtx.setMapJoinCtx(mjOp, mjCtx); opProcCtx.getMapCurrCtx().put(parent, new GenMapRedCtx(childTask, null, null)); - setupBucketMapJoinInfo(cplan, mjOp); + setupBucketMapJoinInfo(cplan, mjOp, false); } currTopOp = null; @@ -937,4 +962,5 @@ private GenMapRedUtils() { // prevent instantiation } + } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java (revision 919258) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java (working copy) @@ -100,12 +100,12 @@ // If the plan for this reducer does not exist, initialize the plan if (opMapTask == null) { assert currPlan.getReducer() == null; - GenMapRedUtils.initMapJoinPlan(mapJoin, ctx, false, false, false, pos); + GenMapRedUtils.initMapJoinPlan(mapJoin, ctx, false, false, false, pos, true); } else { // The current plan can be thrown away after being merged with the // original plan GenMapRedUtils.joinPlan(mapJoin, null, opMapTask, ctx, pos, false, - false, false); + false, false, true); currTask = opMapTask; ctx.setCurrTask(currTask); } @@ -204,7 +204,6 @@ } ctx.setCurrMapJoinOp(mapJoin); - Task currTask = ctx.getCurrTask(); GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(mapJoin); if (mjCtx == null) { @@ -270,7 +269,6 @@ ctx.getParseCtx(); AbstractMapJoinOperator oldMapJoin = ctx.getCurrMapJoinOp(); - assert oldMapJoin != null; GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(mapJoin); if (mjCtx != null) { mjCtx.setOldMapJoin(oldMapJoin); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinOptimizeContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinOptimizeContext.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinOptimizeContext.java (revision 0) @@ -0,0 +1,327 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.exec.JoinOperator; +import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.ScriptOperator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.exec.UnionOperator; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.QBJoinTree; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.MapJoinDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; + +/** + * MapJoinOptimizeContext will help to track where the join keys of one map join + * operator are originally coming from. + * + * Take the below query as an example: + * select /+mapjoin(a)+/ a.k1 as k1, a.value as v1, b.value as v2 from + * (select key as k1, value as value from srcbucket_mapjoin) a + * join + * srcbucket_mapjoin_part b on a.k1=b.key. + * + * There are two join keys (a.k1 and + * b.key) in this map join operator, MapJoinOptimizeContext will find out that + * a.k1 actually come from the top table scan operator for a:srcbucket_mapjoin, + * and use its column 'key', and b.key comes from the table scan operator for + * srcbucket_mapjoin_part and uses its column 'key'. The mapping will be: + * MAPJoinOperator - - > { + * {0:{"TSO for a:srcbucket_mapjoin":[key]}}, + * {1:{"TSO for srcbucket_mapjoin_part"}:[key]} + * } + * , where 0 and 1 are join alias tags. + * + */ +public class MapJoinOptimizeContext { + + Map, Map, List>>> aliasToScanOpToJoinColsMapping; + + public Map, Map, List>>> getAliasToScanOpToJoinColsMapping() { + return aliasToScanOpToJoinColsMapping; + } + + public void setAliasToScanOpToJoinColsMapping( + Map, Map, List>>> aliasToScanOpToJoinColsMapping) { + this.aliasToScanOpToJoinColsMapping = aliasToScanOpToJoinColsMapping; + } + + @SuppressWarnings("unchecked") + public static MapJoinOptimizeContext getMapJoinOptimizeContext( + ParseContext pGraphContext, MapJoinOperator mapJoinOp) { + MapJoinOptimizeContext bucketMJOptCxt = pGraphContext.getBucketOptimizerContext(); + if(bucketMJOptCxt == null) { + bucketMJOptCxt = new MapJoinOptimizeContext(); + pGraphContext.setBucketOptimizerContext(bucketMJOptCxt); + } + + Map, Map, List>>> bucketMapJoinOpJoinColOriginateMap = bucketMJOptCxt.getAliasToScanOpToJoinColsMapping(); + if (bucketMapJoinOpJoinColOriginateMap != null + && bucketMapJoinOpJoinColOriginateMap.get(mapJoinOp) != null) { + return bucketMJOptCxt; + } + + if(bucketMapJoinOpJoinColOriginateMap == null) { + bucketMapJoinOpJoinColOriginateMap = new HashMap, Map, List>>>(); + bucketMJOptCxt.setAliasToScanOpToJoinColsMapping(bucketMapJoinOpJoinColOriginateMap); + } + + Map, List>> mapJoinOpMap = bucketMapJoinOpJoinColOriginateMap.get(mapJoinOp); + if(mapJoinOpMap == null) { + mapJoinOpMap = new HashMap, List>>(); + } + + Map topOps = pGraphContext.getJoinOpToAliasToOpMapping().get(mapJoinOp); + + QBJoinTree joinCxt = pGraphContext.getMapJoinContext().get(mapJoinOp); + + List joinAliases = new ArrayList(); + String baseBigAlias = getJoinAliases(joinCxt, joinAliases); + + MapJoinDesc mjDesc = mapJoinOp.getConf(); + + Map topOpMapping = new HashMap(); + for (int index = 0; index < joinAliases.size(); index++) { + String alias = joinAliases.get(index); + Operator topOp = topOps.get(alias); + + TableScanOperator tso = backTraceTableScanOperator(topOp, pGraphContext); + + if (tso == null) { + return null; + } + // get all join columns from join keys stored in MapJoinDesc + List keys = mjDesc.getKeys().get((byte) index); + + if(keys == null) { + continue; + } + + List joinCols = new ArrayList(); + List joinKeys = new ArrayList(); + joinKeys.addAll(keys); + while (joinKeys.size() > 0) { + ExprNodeDesc node = joinKeys.remove(0); + if (node instanceof ExprNodeColumnDesc) { + joinCols.addAll(node.getCols()); + } else if (node instanceof ExprNodeGenericFuncDesc) { + ExprNodeGenericFuncDesc udfNode = ((ExprNodeGenericFuncDesc) node); + GenericUDF udf = udfNode.getGenericUDF(); + if (!FunctionRegistry.isDeterministic(udf)) { + return null; + } + joinKeys.addAll(0, udfNode.getChildExprs()); + } else { + return null; + } + } + + Map, List> columnTableScanOpMap = new HashMap, List>(); + boolean valid = backTraceSourceTableColumns(columnTableScanOpMap, joinCols, topOp); + if(!valid) { + return null; + } + topOpMapping.put(alias, tso); + mapJoinOpMap.put((byte) index, columnTableScanOpMap); + } + bucketMapJoinOpJoinColOriginateMap.put(mapJoinOp, mapJoinOpMap); + return bucketMJOptCxt; + } + + /** + * Start from one operator and iterator it's parents and grandparents to + * find the source table's original columns. Note that the parent should + * always be 1 expect for map join operators. + * + * @param joinCols + * @param parentOp + * @return + */ + @SuppressWarnings("unchecked") + public static boolean backTraceSourceTableColumns( + Map, List> retMap, + List joinCols, Operator parentOp) { + List preCols = new ArrayList(); + List currCols = new ArrayList(joinCols); + Operator current = parentOp; + boolean valid = true; + //back trace the operator tree, only handle select operator and map join operator + while(current != null) { + preCols.clear(); + if (current instanceof AbstractMapJoinOperator) { + AbstractMapJoinOperator amj = (AbstractMapJoinOperator) current; + MapJoinDesc desc = amj.getConf(); + Map> joinValues = desc.getExprs(); + java.util.ArrayList outputNames = desc.getOutputColumnNames(); + for (String col : currCols) { + int index = outputNames.indexOf(col); + ExprNodeDesc exprNode = null; + int tag = 0; + for (; tag< joinValues.size(); tag++) { + List currJoinValues = joinValues.get((byte)tag); + if(index < currJoinValues.size()) { + exprNode = currJoinValues.get(index); + break; + } else { + index = index - currJoinValues.size(); + } + } + + if (exprNode instanceof ExprNodeColumnDesc) { + preCols.add(((ExprNodeColumnDesc) exprNode).getColumn()); + } else if (exprNode instanceof ExprNodeGenericFuncDesc) { + ExprNodeGenericFuncDesc udfNode = ((ExprNodeGenericFuncDesc) exprNode); + GenericUDF udf = udfNode.getGenericUDF(); + if (!FunctionRegistry.isDeterministic(udf) + || udfNode.getCols().size() != 1) { + valid = false; + break; + } + preCols.add(udfNode.getCols().get(0)); + } else { + valid =false; + break; + } + valid = valid && backTraceSourceTableColumns(retMap, preCols, amj.getParentOperators().get(tag)); + } + return valid; + } + + Map colExprMap = current.getColumnExprMap(); + if (current.getParentOperators() != null + && current.getParentOperators().size() > 0) { + current = current.getParentOperators().get(0); + } else { + retMap.put(current, currCols); + current = null; + continue; + } + // already set current to the parent operator. + if(colExprMap == null) { + continue; + } + + for (String col : currCols) { + ExprNodeDesc exprNode = colExprMap.get(col); + if (exprNode instanceof ExprNodeColumnDesc) { + preCols.add(((ExprNodeColumnDesc) exprNode).getColumn()); + } else if (exprNode instanceof ExprNodeGenericFuncDesc) { + ExprNodeGenericFuncDesc udfNode = ((ExprNodeGenericFuncDesc) exprNode); + //here is a bug, we trust all udfs here. + preCols.add(udfNode.getCols().get(0)); + } else { + valid = false; + break; + } + } + List swap = currCols; + currCols = preCols; + preCols = swap; + } + return valid; + } + + public static String getJoinAliases(QBJoinTree joinCxt, List joinAliases) { + String[] srcs = joinCxt.getBaseSrc(); + String[] left = joinCxt.getLeftAliases(); + List mapAlias = joinCxt.getMapAliases(); + String baseBigAlias = null; + for(String s : left) { + if(s != null && !joinAliases.contains(s)) { + joinAliases.add(s); + if(!mapAlias.contains(s)) { + baseBigAlias = s; + } + } + } + for(String s : srcs) { + if(s != null && !joinAliases.contains(s)) { + joinAliases.add(s); + if(!mapAlias.contains(s)) { + baseBigAlias = s; + } + } + } + return baseBigAlias; + } + + public static TableScanOperator backTraceTableScanOperator(Operator start, ParseContext pGraphContext) { + if(start == null) { + return null; + } + + if(start instanceof TableScanOperator) { + return (TableScanOperator) start; + } + if ((start instanceof JoinOperator) + || (start instanceof UnionOperator) + || (start instanceof ReduceSinkOperator) + || (start instanceof LateralViewJoinOperator) + || (start instanceof GroupByOperator)) { + return null; + } + if (start instanceof ScriptOperator + && !HiveConf.getBoolVar(pGraphContext.getConf(), + HiveConf.ConfVars.HIVEOPTSORTMERGEBUCKETMAPJOINTRANSFORMTRUST)) { + return null; + } + + if(start instanceof AbstractMapJoinOperator) { + AbstractMapJoinOperator mj = (AbstractMapJoinOperator) start; + int posBigTbl = ((MapJoinDesc) mj.getConf()).getPosBigTable(); + return backTraceTableScanOperator((Operator)mj.getParentOperators().get(posBigTbl), pGraphContext); + } + + if (start.getParentOperators() == null + || start.getParentOperators().size() > 1) { + return null; + } else { + return backTraceTableScanOperator((Operator)start.getParentOperators().get(0), pGraphContext); + } + } + + public static String reverseLookupTableScanMap(ParseContext pGraphContext, + TableScanOperator topScabnOp) { + Map> queryTopOps = pGraphContext.getTopOps(); + for(Map.Entry> entry : queryTopOps.entrySet()) { + if(entry.getValue() == topScabnOp || entry.getValue().equals(topScabnOp)) { + return entry.getKey(); + } + } + return null; + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (revision 919258) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (working copy) @@ -23,6 +23,7 @@ import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; @@ -31,10 +32,8 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.ColumnInfo; -import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; @@ -42,7 +41,6 @@ import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.exec.SelectOperator; -import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; import org.apache.hadoop.hive.ql.lib.GraphWalker; @@ -51,29 +49,21 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.Partition; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; -import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.ErrorMsg; import org.apache.hadoop.hive.ql.parse.GenMapRedWalker; import org.apache.hadoop.hive.ql.parse.OpParseContext; import org.apache.hadoop.hive.ql.parse.ParseContext; -import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.QBJoinTree; import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; /** * Implementation of one of the rule-based map join optimization. User passes @@ -83,9 +73,9 @@ * implemented, this transformation can also be done based on costs. */ public class MapJoinProcessor implements Transform { - + private static final Log LOG = LogFactory.getLog(MapJoinProcessor.class.getName()); - + private ParseContext pGraphContext; /** @@ -102,10 +92,10 @@ pGraphContext.getOpParseCtx().put(op, ctx); return op; } - + /** * convert a regular join to a a map-side join. - * + * * @param op * join operator * @param qbJoin @@ -255,7 +245,7 @@ keyTableDesc, valueExprMap, valueTableDescs, outputColumnNames, mapJoinPos, joinCondns), new RowSchema(outputRS.getColumnInfos()), newPar), outputRS); - + mapJoinOp.getConf().setReversedExprs(op.getConf().getReversedExprs()); mapJoinOp.setColumnExprMap(colExprMap); @@ -271,6 +261,11 @@ op.setChildOperators(null); op.setParentOperators(null); + HashMap map = pctx.getJoinOpToAliasToOpMapping().remove(op); + if(map != null) { + pctx.getJoinOpToAliasToOpMapping().put(mapJoinOp, map); + } + // create a dummy select to select all columns genSelectPlan(pctx, mapJoinOp); return mapJoinOp; @@ -340,7 +335,7 @@ /** * Is it a map-side join. - * + * * @param op * join operator * @param qbJoin @@ -386,7 +381,7 @@ /** * Transform the query tree. For each join, check if it is a map-side join * (user specified). If yes, convert it to a map-side join. - * + * * @param pactx * current parse context */ @@ -438,15 +433,13 @@ getMapJoinFS()); opRules.put(new RuleRegExp(new String("R2"), "MAPJOIN%.*RS%"), getMapJoinDefault()); - opRules.put(new RuleRegExp(new String("R3"), "MAPJOIN%.*MAPJOIN%"), - getMapJoinDefault()); opRules.put(new RuleRegExp(new String("R4"), "MAPJOIN%.*UNION%"), getMapJoinDefault()); // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along Dispatcher disp = new DefaultRuleDispatcher(getDefault(), opRules, - new MapJoinWalkerCtx(listMapJoinOpsNoRed)); + new MapJoinWalkerCtx(listMapJoinOpsNoRed, pGraphContext)); GraphWalker ogw = new GenMapRedWalker(disp); ArrayList topNodes = new ArrayList(); @@ -472,11 +465,132 @@ MapJoinWalkerCtx ctx = (MapJoinWalkerCtx) procCtx; MapJoinOperator mapJoin = (MapJoinOperator) nd; + MapJoinOperator parentMapJoin = (MapJoinOperator) ctx.getCurrMapJoinOp(); + if (parentMapJoin != null + && (ctx.getListRejectedMapJoins() != null && !ctx + .getListRejectedMapJoins().contains(parentMapJoin))) { + //for rule: MapJoin%.*MapJoin + // have a parent mapjoin. if the parent mapjoin comes from a local work, + // will put the parent mapjoin in the rejected list. + int pos = findGrandparentBranch(mapJoin, parentMapJoin); + if (pos >= 0) { + if (pos != mapJoin.getConf().getPosBigTable()) { + addRejectMapJoinToCtx(ctx, parentMapJoin); + } else { + if(sameJoinKeys(ctx.getpGraphContext(), mapJoin, parentMapJoin, pos)) { + addNoReducerMapJoinToCtx(ctx, parentMapJoin); + } else { + addRejectMapJoinToCtx(ctx, parentMapJoin); + } + } + } + } ctx.setCurrMapJoinOp(mapJoin); return null; } + + /** + * Given a map join operator and its parent map join operator, find out + * whether this map join operator is using the same join keys with its + * parent map join operator or not. For example, the parent map join + * operator is joining table t1 with table t2 on t1.key=t2.key and + * t1.value=t2.value, this map join has the same join keys with its parent + * if it also using t1.key, t1.value (or t2.key, t2.value) as its join keys + * (Note: t1.key, t2.value will not work). + * + * @param parseContext + * @param mapJoin + * @param parentMapJoin + * @param pos + * @return + */ + private boolean sameJoinKeys(ParseContext parseContext, MapJoinOperator mapJoin, + MapJoinOperator parentMapJoin, int pos) { + + MapJoinOptimizeContext cxt = MapJoinOptimizeContext.getMapJoinOptimizeContext(parseContext, mapJoin); + if (cxt == null) { + return false; + } + Map, List>> currMapJoinCxt = cxt.getAliasToScanOpToJoinColsMapping().get(mapJoin); + + cxt = MapJoinOptimizeContext.getMapJoinOptimizeContext(parseContext, parentMapJoin); + if (cxt == null) { + return false; + } + Map, List>> parentMapJoinCxt = cxt.getAliasToScanOpToJoinColsMapping().get(parentMapJoin); + + if(currMapJoinCxt == null || parentMapJoinCxt == null) { + return false; + } + + Map, List> parentMJBranchCopyMap = new HashMap, List>(); + parentMJBranchCopyMap.putAll(currMapJoinCxt.get((byte)pos)); + + // only merge when the set of current map join operator's join keys from + // the parent branch are from a same table as the parent map join + // operator. + for(Map, List> entry : parentMapJoinCxt.values()) { + if(entry.equals(parentMJBranchCopyMap)) { + return true; + } + } + return false; + } } + private static void addNoReducerMapJoinToCtx(MapJoinWalkerCtx ctx, + AbstractMapJoinOperator mapJoin) { + List> listMapJoinsNoRed = ctx.getListMapJoinsNoRed(); + if (listMapJoinsNoRed == null) { + listMapJoinsNoRed = new ArrayList>(); + } + if (!listMapJoinsNoRed.contains(mapJoin)) { + listMapJoinsNoRed.add(mapJoin); + } + ctx.setListMapJoins(listMapJoinsNoRed); + } + + private static void addRejectMapJoinToCtx(MapJoinWalkerCtx ctx, + AbstractMapJoinOperator mapjoin) { + List> listRejectedMapJoins = ctx.getListRejectedMapJoins(); + if (listRejectedMapJoins == null) { + listRejectedMapJoins = new ArrayList>(); + } + if (!listRejectedMapJoins.contains(mapjoin)) { + listRejectedMapJoins.add(mapjoin); + } + + if (ctx.getListMapJoinsNoRed() != null + && ctx.getListMapJoinsNoRed().contains(mapjoin)) { + ctx.getListMapJoinsNoRed().remove(mapjoin); + } + + ctx.setListMapJoins(listRejectedMapJoins); + } + + private static int findGrandparentBranch(Operator currOp, Operator grandParent) { + int pos = -1; + for (int i = 0; i < currOp.getParentOperators().size(); i++) { + List> parentOpList = new LinkedList>(); + parentOpList.add(currOp.getParentOperators().get(i)); + boolean found = false; + while (!parentOpList.isEmpty()) { + Operator p = parentOpList.remove(0); + if(p == grandParent) { + found = true; + break; + } else if (p.getParentOperators() != null){ + parentOpList.addAll(p.getParentOperators()); + } + } + if(found) { + pos = i; + break; + } + } + return pos; + } + /** * MapJoinFS. * @@ -500,13 +614,7 @@ && (listRejectedMapJoins.contains(mapJoin))) { return null; } - - List> listMapJoinsNoRed = ctx.getListMapJoinsNoRed(); - if (listMapJoinsNoRed == null) { - listMapJoinsNoRed = new ArrayList>(); - } - listMapJoinsNoRed.add(mapJoin); - ctx.setListMapJoins(listMapJoinsNoRed); + addNoReducerMapJoinToCtx(ctx, mapJoin); return null; } } @@ -525,13 +633,7 @@ Object... nodeOutputs) throws SemanticException { MapJoinWalkerCtx ctx = (MapJoinWalkerCtx) procCtx; AbstractMapJoinOperator mapJoin = ctx.getCurrMapJoinOp(); - List> listRejectedMapJoins = ctx - .getListRejectedMapJoins(); - if (listRejectedMapJoins == null) { - listRejectedMapJoins = new ArrayList>(); - } - listRejectedMapJoins.add(mapJoin); - ctx.setListRejectedMapJoins(listRejectedMapJoins); + addRejectMapJoinToCtx(ctx, mapJoin); return null; } } @@ -573,17 +675,21 @@ * */ public static class MapJoinWalkerCtx implements NodeProcessorCtx { + + private ParseContext pGraphContext; private List> listMapJoinsNoRed; private List> listRejectedMapJoins; private AbstractMapJoinOperator currMapJoinOp; /** * @param listMapJoinsNoRed + * @param pGraphContext2 */ - public MapJoinWalkerCtx(List> listMapJoinsNoRed) { + public MapJoinWalkerCtx(List> listMapJoinsNoRed, ParseContext pGraphContext) { this.listMapJoinsNoRed = listMapJoinsNoRed; currMapJoinOp = null; listRejectedMapJoins = new ArrayList>(); + this.pGraphContext = pGraphContext; } /** @@ -631,5 +737,14 @@ List> listRejectedMapJoins) { this.listRejectedMapJoins = listRejectedMapJoins; } + + public ParseContext getpGraphContext() { + return pGraphContext; + } + + public void setpGraphContext(ParseContext pGraphContext) { + this.pGraphContext = pGraphContext; + } + } } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedMergeBucketMapJoinOptimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedMergeBucketMapJoinOptimizer.java (revision 919258) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedMergeBucketMapJoinOptimizer.java (working copy) @@ -29,7 +29,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.metastore.api.Order; -import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator; @@ -52,12 +51,7 @@ import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.QBJoinTree; import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.SMBJoinDesc; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; //try to replace a bucket map join with a sorted merge map join public class SortedMergeBucketMapJoinOptimizer implements Transform { @@ -129,54 +123,73 @@ boolean tableSorted = true; QBJoinTree joinCxt = this.pGraphContext.getMapJoinContext() .get(mapJoinOp); - if (joinCxt == null) + if (joinCxt == null) { return null; + } String[] srcs = joinCxt.getBaseSrc(); int pos = 0; for (String src : srcs) { tableSorted = tableSorted && isTableSorted(this.pGraphContext, mapJoinOp, joinCxt, src, pos); + if (!tableSorted) { + break; + } pos++; } if (!tableSorted) { //this is a mapjoin but not suit for a sort merge bucket map join. check outer joins - MapJoinProcessor.checkMapJoin(((MapJoinOperator) nd).getConf().getPosBigTable(), + MapJoinProcessor.checkMapJoin(((MapJoinOperator) nd).getConf().getPosBigTable(), ((MapJoinOperator) nd).getConf().getConds()); return null; } // convert a bucket map join operator to a sorted merge bucket map join // operator - convertToSMBJoin(mapJoinOp, srcs); + convertToSMBJoin(mapJoinOp); return null; } - private SMBMapJoinOperator convertToSMBJoin(MapJoinOperator mapJoinOp, - String[] srcs) { + private SMBMapJoinOperator convertToSMBJoin(MapJoinOperator mapJoinOp) { SMBMapJoinOperator smbJop = new SMBMapJoinOperator(mapJoinOp); SMBJoinDesc smbJoinDesc = new SMBJoinDesc(mapJoinOp.getConf()); smbJop.setConf(smbJoinDesc); HashMap tagToAlias = new HashMap(); - for (int i = 0; i < srcs.length; i++) { - tagToAlias.put((byte) i, srcs[i]); + + Map, List>> bucketMapJoinContext = MapJoinOptimizeContext + .getMapJoinOptimizeContext(this.pGraphContext, mapJoinOp).getAliasToScanOpToJoinColsMapping().get(mapJoinOp); + + for(byte pos: mapJoinOp.getConf().getTagOrder()) { + if(pos == mapJoinOp.getConf().getPosBigTable()) { + continue; + } + Map, List> tblScanMap = bucketMapJoinContext.get(pos); + if (tblScanMap.size() != 1) { + return null; + } + TableScanOperator op = null; + for (Map.Entry, List> entry: tblScanMap.entrySet()) { + op = (TableScanOperator) entry.getKey(); + } + tagToAlias.put(pos, MapJoinOptimizeContext.reverseLookupTableScanMap(this.pGraphContext, op)); } + smbJoinDesc.setTagToAlias(tagToAlias); - + int indexInListMapJoinNoReducer = this.pGraphContext.getListMapJoinOpsNoReducer().indexOf(mapJoinOp); if(indexInListMapJoinNoReducer >= 0 ) { this.pGraphContext.getListMapJoinOpsNoReducer().remove(indexInListMapJoinNoReducer); this.pGraphContext.getListMapJoinOpsNoReducer().add(indexInListMapJoinNoReducer, smbJop); } - List parentOperators = mapJoinOp.getParentOperators(); + List> parentOperators = mapJoinOp.getParentOperators(); for (int i = 0; i < parentOperators.size(); i++) { - Operator par = parentOperators.get(i); + Operator par = parentOperators.get(i); int index = par.getChildOperators().indexOf(mapJoinOp); par.getChildOperators().remove(index); par.getChildOperators().add(index, smbJop); } - List childOps = mapJoinOp.getChildOperators(); + List> childOps = mapJoinOp.getChildOperators(); for (int i = 0; i < childOps.size(); i++) { - Operator child = childOps.get(i); + Operator child = childOps.get(i); int index = child.getParentOperators().indexOf(mapJoinOp); child.getParentOperators().remove(index); child.getParentOperators().add(index, smbJop); @@ -186,61 +199,49 @@ private boolean isTableSorted(ParseContext pctx, MapJoinOperator op, QBJoinTree joinTree, String alias, int pos) throws SemanticException { - Map> topOps = this.pGraphContext - .getTopOps(); Map topToTable = this.pGraphContext .getTopToTable(); - TableScanOperator tso = (TableScanOperator) topOps.get(alias); - if (tso == null) - return false; - List keys = op.getConf().getKeys().get((byte) pos); - // get all join columns from join keys stored in MapJoinDesc - List joinCols = new ArrayList(); - List joinKeys = new ArrayList(); - joinKeys.addAll(keys); - while (joinKeys.size() > 0) { - ExprNodeDesc node = joinKeys.remove(0); - if (node instanceof ExprNodeColumnDesc) { - joinCols.addAll(node.getCols()); - } else if (node instanceof ExprNodeGenericFuncDesc) { - ExprNodeGenericFuncDesc udfNode = ((ExprNodeGenericFuncDesc) node); - GenericUDF udf = udfNode.getGenericUDF(); - if (!FunctionRegistry.isDeterministic(udf)) { - return false; - } - joinKeys.addAll(0, udfNode.getChildExprs()); - } + Map, List>> bucketMapJoinContext = MapJoinOptimizeContext + .getMapJoinOptimizeContext(this.pGraphContext, op) + .getAliasToScanOpToJoinColsMapping().get(op); + if(bucketMapJoinContext == null) { + return false; + } + Map, List> tableScanJoinColsMap = bucketMapJoinContext.get((byte)pos); + if(tableScanJoinColsMap == null ) { + return false; } - Table tbl = topToTable.get(tso); - if (tbl.isPartitioned()) { - PrunedPartitionList prunedParts = null; - try { - prunedParts = PartitionPruner.prune(tbl, pGraphContext - .getOpToPartPruner().get(tso), pGraphContext.getConf(), alias, - pGraphContext.getPrunedPartitions()); - } catch (HiveException e) { - LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); - throw new SemanticException(e.getMessage(), e); + boolean ret = true; + for (Map.Entry, List> map: tableScanJoinColsMap.entrySet()) { + if(!ret) { + return false; } - boolean ret = true; - for (Partition p : prunedParts.getConfirmedPartns()) { - ret = ret && checkSortColsAndJoinCols(p.getSortCols(), joinCols); - if (!ret) { - return false; + TableScanOperator tso = (TableScanOperator) map.getKey(); + Table tbl = topToTable.get(tso); + List joinCols = map.getValue(); + if (tbl.isPartitioned()) { + PrunedPartitionList prunedParts = null; + try { + prunedParts = PartitionPruner.prune(tbl, pGraphContext + .getOpToPartPruner().get(tso), pGraphContext.getConf(), alias, + pGraphContext.getPrunedPartitions()); + } catch (HiveException e) { + LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); + throw new SemanticException(e.getMessage(), e); } - } - for (Partition p : prunedParts.getUnknownPartns()) { - ret = ret && checkSortColsAndJoinCols(p.getSortCols(), joinCols); - if (!ret) { - return false; + for (Partition p : prunedParts.getConfirmedPartns()) { + ret = ret && checkSortColsAndJoinCols(p.getSortCols(), joinCols); } + for (Partition p : prunedParts.getUnknownPartns()) { + ret = ret && checkSortColsAndJoinCols(p.getSortCols(), joinCols); + } + } else { + ret = ret && checkSortColsAndJoinCols(tbl.getSortCols(), joinCols); } - } else { - return checkSortColsAndJoinCols(tbl.getSortCols(), joinCols); } - return true; + return ret; } private boolean checkSortColsAndJoinCols(List sortCols, Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (revision 919258) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (working copy) @@ -28,12 +28,14 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; +import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.optimizer.MapJoinOptimizeContext; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.LoadFileDesc; @@ -48,7 +50,7 @@ * populated. Note that since the parse context contains the operator tree, it * can be easily retrieved by the next optimization step or finally for task * generation after the plan has been completely optimized. - * + * **/ public class ParseContext { @@ -75,6 +77,8 @@ private Map> groupOpToInputTables; private Map prunedPartitions; + private MapJoinOptimizeContext bucketOptimizerContext; + // is set to true if the expression only contains partitioning columns and not // any other column reference. // This is used to optimize select * from table where ... scenario, when the @@ -84,6 +88,8 @@ // a map-reduce job private boolean hasNonPartCols; + private Map, HashMap> joinOpToAliasToOpMapping; + public ParseContext() { } @@ -118,6 +124,7 @@ * list of map join operators with no reducer * @param opToSamplePruner * operator to sample pruner map + * @param joinOpToAliasToOpMapping */ public ParseContext( HiveConf conf, @@ -134,7 +141,8 @@ UnionProcContext uCtx, List> listMapJoinOpsNoReducer, Map> groupOpToInputTables, Map prunedPartitions, - HashMap opToSamplePruner) { + HashMap opToSamplePruner, + Map, HashMap> joinOpToAliasToOpMapping) { this.conf = conf; this.qb = qb; this.ast = ast; @@ -156,6 +164,7 @@ this.groupOpToInputTables = groupOpToInputTables; this.prunedPartitions = prunedPartitions; this.opToSamplePruner = opToSamplePruner; + this.joinOpToAliasToOpMapping = joinOpToAliasToOpMapping; } /** @@ -383,7 +392,7 @@ /** * Sets the hasNonPartCols flag. - * + * * @param val */ public void setHasNonPartCols(boolean val) { @@ -450,4 +459,21 @@ public void setMapJoinContext(Map mapJoinContext) { this.mapJoinContext = mapJoinContext; } + + public Map, HashMap> getJoinOpToAliasToOpMapping() { + return joinOpToAliasToOpMapping; + } + + public void setJoinOpToAliasToOpMapping( + Map, HashMap> joinOpToAliasToOpMapping) { + this.joinOpToAliasToOpMapping = joinOpToAliasToOpMapping; + } + + public MapJoinOptimizeContext getBucketOptimizerContext() { + return bucketOptimizerContext; + } + + public void setBucketOptimizerContext(MapJoinOptimizeContext bucketOptimizerContext) { + this.bucketOptimizerContext = bucketOptimizerContext; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 920525) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -47,6 +47,7 @@ import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; import org.apache.hadoop.hive.ql.exec.ConditionalTask; import org.apache.hadoop.hive.ql.exec.ExecDriver; import org.apache.hadoop.hive.ql.exec.FetchTask; @@ -54,7 +55,6 @@ import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.MapRedTask; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; @@ -183,6 +183,7 @@ private CreateViewDesc createVwDesc; private ASTNode viewSelect; private final UnparseTranslator unparseTranslator; + private Map, HashMap> joinOpToAliasToOpMapping; private static class Phase1Ctx { String dest; @@ -208,6 +209,7 @@ groupOpToInputTables = new HashMap>(); prunedPartitions = new HashMap(); unparseTranslator = new UnparseTranslator(); + joinOpToAliasToOpMapping = new HashMap, HashMap>(); } @Override @@ -245,13 +247,14 @@ qb = pctx.getQB(); groupOpToInputTables = pctx.getGroupOpToInputTables(); prunedPartitions = pctx.getPrunedPartitions(); + joinOpToAliasToOpMapping = pctx.getJoinOpToAliasToOpMapping(); } public ParseContext getParseContext() { return new ParseContext(conf, qb, ast, opToPartPruner, topOps, topSelOps, opParseCtx, joinContext, topToTable, loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, - groupOpToInputTables, prunedPartitions, opToSamplePruner); + groupOpToInputTables, prunedPartitions, opToSamplePruner, joinOpToAliasToOpMapping); } @SuppressWarnings("nls") @@ -3627,7 +3630,7 @@ column = null; } else { column = TypeCheckProcFactory.DefaultExprProcessor - .getFuncExprNodeDesc(tableFieldTypeInfo.getTypeName(), + .getFuncExprNodeDesc(tableFieldTypeInfo.getTypeName(), column); } if (column == null) { @@ -3975,11 +3978,11 @@ } private Operator genJoinOperator(QB qb, QBJoinTree joinTree, - HashMap map) throws SemanticException { + HashMap map, HashMap aliasToTopScanMap) throws SemanticException { QBJoinTree leftChild = joinTree.getJoinSrc(); Operator joinSrcOp = null; if (leftChild != null) { - Operator joinOp = genJoinOperator(qb, leftChild, map); + Operator joinOp = genJoinOperator(qb, leftChild, map, aliasToTopScanMap); ArrayList filter = joinTree.getFilters().get(0); for (ASTNode cond : filter) { joinOp = genFilterPlan(qb, cond, joinOp); @@ -4031,6 +4034,7 @@ JoinOperator joinOp = (JoinOperator) genJoinOperatorChildren(joinTree, joinSrcOp, srcOps, omitOpts); joinContext.put(joinOp, joinTree); + joinOpToAliasToOpMapping.put(joinOp, aliasToTopScanMap); return joinOp; } @@ -4171,10 +4175,10 @@ } } - private Operator genJoinPlan(QB qb, HashMap map) + private Operator genJoinPlan(QB qb, HashMap map, HashMap aliasToTopScabOp) throws SemanticException { QBJoinTree joinTree = qb.getQbJoinTree(); - Operator joinOp = genJoinOperator(qb, joinTree, map); + Operator joinOp = genJoinOperator(qb, joinTree, map, aliasToTopScabOp); return joinOp; } @@ -5380,10 +5384,12 @@ mergeJoinTree(qb); } + HashMap aliasTopOpMappingCopy = new HashMap(); + aliasTopOpMappingCopy.putAll(aliasToOpInfo); // if any filters are present in the join tree, push them on top of the // table pushJoinFilters(qb, qb.getQbJoinTree(), aliasToOpInfo); - srcOpInfo = genJoinPlan(qb, aliasToOpInfo); + srcOpInfo = genJoinPlan(qb, aliasToOpInfo, aliasTopOpMappingCopy); } else { // Now if there are more than 1 sources then we have a join case // later we can extend this to the union all case as well @@ -5935,7 +5941,7 @@ topOps, topSelOps, opParseCtx, joinContext, topToTable, loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions, - opToSamplePruner); + opToSamplePruner, joinOpToAliasToOpMapping); Optimizer optm = new Optimizer(); optm.setPctx(pCtx); Index: ql/src/test/queries/clientpositive/bucketmapjoin_subquery.q =================================================================== --- ql/src/test/queries/clientpositive/bucketmapjoin_subquery.q (revision 0) +++ ql/src/test/queries/clientpositive/bucketmapjoin_subquery.q (revision 0) @@ -0,0 +1,105 @@ +drop table srcbucket_mapjoin; +drop table srcbucket_mapjoin_part; +drop table srcbucket_mapjoin_part_2; + +CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin; +load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin; + +CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09'); +load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09'); +load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09'); +load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09'); + +CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); +load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); +load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09'); +load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09'); + +set hive.optimize.bucketmapjoin = true; + +explain extended +select count(1) from ( +select /*+mapjoin(a)*/ a.key as key, a.value as v1, b.value as v2 +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key) subq; + +select count(1) from ( +select /*+mapjoin(a)*/ a.key as key, a.value as v1, b.value as v2 +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key) subq; + +explain extended +select * from ( +select /*+mapjoin(a)*/ a.key as key, a.value as v1, b.value as v2 +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key) subq where v1 != '100'; + +select * from ( +select /*+mapjoin(a)*/ a.key as key, a.value as v1, b.value as v2 +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key) subq where v1 != '100'; + +explain extended +select /*+mapjoin(a)*/ a.key as key, a.value as v1, b.value as v2 +from +(select * from srcbucket_mapjoin where value != '100') a +join srcbucket_mapjoin_part b +on a.key=b.key; + +select /*+mapjoin(a)*/ a.key as key, a.value as v1, b.value as v2 +from +(select * from srcbucket_mapjoin where value != '100') a +join srcbucket_mapjoin_part b +on a.key=b.key; + +explain extended +select count(1) from ( +select /*+mapjoin(a)*/ a.key as key, a.value as v1, b.value as v2 +from +(select * from srcbucket_mapjoin where key<100) a +join srcbucket_mapjoin_part b +on a.key=b.key) subq; + +select count(1) from +( +select /*+mapjoin(b)*/ a.key as key, a.value as v1, b.value as v2 +from +(select * from srcbucket_mapjoin where key<100) a +join srcbucket_mapjoin_part b +on a.key=b.key) subq; + + +explain extended +select count(1) from ( +select /*+mapjoin(a, c)*/ a.key as key, a.value as v1, b.value as v2 +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key join srcbucket_mapjoin_part c on b.key = c.key ) subq; + +select count(1) from ( +select /*+mapjoin(a, c)*/ a.key as key, a.value as v1, b.value as v2 +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key join srcbucket_mapjoin_part c on b.key = c.key ) subq; + + +explain extended +select count(1) from ( +select /*+mapjoin(a, c)*/ a.key as key, a.value as v1, b.value as v2 +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key join srcbucket_mapjoin_part c on b.key = c.key where a.key<100 and b.key<100) subq; + +select count(1) from ( +select /*+mapjoin(a, c)*/ a.key as key, a.value as v1, b.value as v2 +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key join srcbucket_mapjoin_part c on b.key = c.key where a.key<100 and b.key<100) subq; + + +drop table srcbucket_mapjoin; +drop table srcbucket_mapjoin_part; +drop table srcbucket_mapjoin_part_2; \ No newline at end of file Index: ql/src/test/queries/clientpositive/smb_subquery.q =================================================================== --- ql/src/test/queries/clientpositive/smb_subquery.q (revision 0) +++ ql/src/test/queries/clientpositive/smb_subquery.q (revision 0) @@ -0,0 +1,159 @@ +set hive.enforce.bucketing = true; +set hive.enforce.sorting = true; + +drop table bucketmapjoin6_1; +CREATE TABLE bucketmapjoin6_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; + +drop table bucketmapjoin6_2; +CREATE TABLE bucketmapjoin6_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; + +drop table bucketmapjoin6_3; +CREATE TABLE bucketmapjoin6_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; + +insert overwrite table bucketmapjoin6_1 +select * from src where key < 100 and key != 44 and key != 28 and key != 15; + +select * from bucketmapjoin6_1; + +insert overwrite table bucketmapjoin6_2 +select * from src where key > 20 and key < 50; + +select * from bucketmapjoin6_2; + +insert overwrite table bucketmapjoin6_3 +select * from src where key > 10 and key < 30; + +select * from bucketmapjoin6_3; + +explain +select * from +( +select +CASE +WHEN M.key is not null THEN M.key +ELSE S.key +END as key, +M.value as Mvalue, S.value as Svalue from +(select * from bucketmapjoin6_1 where key <= 20 or key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +full outer join +bucketmapjoin6_3 L +on subq2.key = L.key; + +select * from +( +select +CASE +WHEN M.key is not null THEN M.key +ELSE S.key +END as key, +M.value as Mvalue, S.value as Svalue from +(select * from bucketmapjoin6_1 where key <= 20 or key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +full outer join +bucketmapjoin6_3 L +on subq2.key = L.key; + + + +set hive.optimize.bucketmapjoin = true; +set hive.optimize.bucketmapjoin.sortedmerge = true; + +explain +select /*+ MAPJOIN(L) */ * from +( +select /*+ MAPJOIN(S) */ +CASE +WHEN M.key is not null THEN M.key +ELSE S.key +END as key, +M.value as Mvalue, S.value as Svalue from +(select * from bucketmapjoin6_1 where key <= 20 and key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +full outer join +bucketmapjoin6_3 L +on subq2.key = L.key; + +select /*+ MAPJOIN(L) */ * from +( +select /*+ MAPJOIN(S) */ +CASE +WHEN M.key is not null THEN M.key +ELSE S.key +END as key, +M.value as Mvalue, S.value as Svalue from +(select * from bucketmapjoin6_1 where key <= 20 or key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +full outer join +bucketmapjoin6_3 L +on subq2.key = L.key; + + +explain +select /*+ MAPJOIN(L) */ * from bucketmapjoin6_3 L +full outer join +( +select /*+ MAPJOIN(S) */ +CASE +WHEN M.key is not null THEN M.key +ELSE S.key +END as key, +M.value as Mvalue, S.value as Svalue from +(select * from bucketmapjoin6_1 where key <= 20 and key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +on subq2.key = L.key; + +select /*+ MAPJOIN(L) */ * from bucketmapjoin6_3 L +full outer join +( +select /*+ MAPJOIN(S) */ +CASE +WHEN M.key is not null THEN M.key +ELSE S.key +END as key, +M.value as Mvalue, S.value as Svalue from +(select * from bucketmapjoin6_1 where key <= 20 and key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +on subq2.key = L.key; + + +set hive.optimize.bucketmapjoin = false; +select * from +( +select +CASE +WHEN M.key is not null THEN M.key +ELSE S.key +END as key, +M.value as Mvalue, S.value as Svalue from +(select * from bucketmapjoin6_1 where key <= 20 or key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +full outer join +bucketmapjoin6_3 L +on subq2.key = L.key; + +select * from +( +select +CASE +WHEN M.key is not null THEN M.key +ELSE S.key +END as key, +M.value as Mvalue, S.value as Svalue from +(select * from bucketmapjoin6_1 where key <= 20 or key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +full outer join +bucketmapjoin6_3 L +on subq2.key = L.key; + +drop table bucketmapjoin6_3; +drop table bucketmapjoin6_2; +drop table bucketmapjoin6_1; \ No newline at end of file Index: ql/src/test/queries/clientpositive/smb_transform.q =================================================================== --- ql/src/test/queries/clientpositive/smb_transform.q (revision 0) +++ ql/src/test/queries/clientpositive/smb_transform.q (revision 0) @@ -0,0 +1,82 @@ +set hive.enforce.bucketing = true; +set hive.enforce.sorting = true; + +drop table bucketmapjoin6_1; +CREATE TABLE bucketmapjoin6_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; + +drop table bucketmapjoin6_2; +CREATE TABLE bucketmapjoin6_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; + +drop table bucketmapjoin6_3; +CREATE TABLE bucketmapjoin6_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; + +insert overwrite table bucketmapjoin6_1 +select * from src where key < 100 and key != 44 and key != 28 and key != 15; + +insert overwrite table bucketmapjoin6_2 +select * from src where key > 20 and key < 50; + +insert overwrite table bucketmapjoin6_3 +select * from src where key > 10 and key < 30; + +set hive.optimize.bucketmapjoin = true; +set hive.optimize.bucketmapjoin.sortedmerge = true; +set hive.optimize.bucketmapjoin.transform.trust = true; + +explain +select /*+ MAPJOIN(L) */ * from +( +select /*+ MAPJOIN(S) */ +S.key as key, +M.value as Mvalue, S.value as Svalue from +(select TRANSFORM(*) USING 'cat' from bucketmapjoin6_1 where key <= 20 and key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +full outer join +bucketmapjoin6_3 L +on subq2.key = L.key; + +select /*+ MAPJOIN(L) */ * from +( +select /*+ MAPJOIN(S) */ +S.key as key, +M.value as Mvalue, S.value as Svalue from +(select TRANSFORM(*) USING 'cat' from bucketmapjoin6_1 where key <= 20 and key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +full outer join +bucketmapjoin6_3 L +on subq2.key = L.key; + +set hive.optimize.bucketmapjoin.transform.trust = false; + +explain +select /*+ MAPJOIN(L) */ * from +( +select /*+ MAPJOIN(S) */ +S.key as key, +M.value as Mvalue, S.value as Svalue from +(select TRANSFORM(*) USING 'cat' from bucketmapjoin6_1 where key <= 20 and key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +full outer join +bucketmapjoin6_3 L +on subq2.key = L.key; + +select /*+ MAPJOIN(L) */ * from +( +select /*+ MAPJOIN(S) */ +S.key as key, +M.value as Mvalue, S.value as Svalue from +(select TRANSFORM(*) USING 'cat' from bucketmapjoin6_1 where key <= 20 and key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +full outer join +bucketmapjoin6_3 L +on subq2.key = L.key; + +drop table bucketmapjoin6_3; +drop table bucketmapjoin6_2; +drop table bucketmapjoin6_1; + + Index: ql/src/test/results/clientpositive/bucketmapjoin_subquery.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin_subquery.q.out (revision 0) +++ ql/src/test/results/clientpositive/bucketmapjoin_subquery.q.out (revision 0) @@ -0,0 +1,3708 @@ +PREHOOK: query: drop table srcbucket_mapjoin +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table srcbucket_mapjoin +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table srcbucket_mapjoin_part +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table srcbucket_mapjoin_part +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table srcbucket_mapjoin_part_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table srcbucket_mapjoin_part_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part +PREHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-09 +PREHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-09 +PREHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-09 +PREHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-09 +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +PREHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +PREHOOK: query: explain extended +select count(1) from ( +select /*+mapjoin(a)*/ a.key as key, a.value as v1, b.value as v2 +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key) subq +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select count(1) from ( +select /*+mapjoin(a)*/ a.key as key, a.value as v1, b.value as v2 +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key) subq +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF srcbucket_mapjoin a) (TOK_TABREF srcbucket_mapjoin_part b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) v1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) v2)))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + subq:b + TableScan + alias: b + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 1 + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-57-39_803_2931597555732627538/10002 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns + columns.types + escape.delim \ + TotalFiles: 1 + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + subq:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subq:a + TableScan + alias: a + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 1 + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-57-39_803_2931597555732627538/10002 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns + columns.types + escape.delim \ + TotalFiles: 1 + MultiFileSpray: false + Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + subq:a {srcbucket20.txt=[srcbucket20.txt], srcbucket21.txt=[srcbucket21.txt], srcbucket22.txt=[srcbucket20.txt], srcbucket23.txt=[srcbucket21.txt], ds=2008-04-09/srcbucket20.txt=[srcbucket20.txt], ds=2008-04-09/srcbucket21.txt=[srcbucket21.txt], ds=2008-04-09/srcbucket22.txt=[srcbucket20.txt], ds=2008-04-09/srcbucket23.txt=[srcbucket21.txt]} + Alias Bucket File Name Mapping: + subq:a {file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket20.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket21.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket22.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket23.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt]} + Needs Tagging: false + Path -> Alias: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 [subq:b] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09 [subq:b] + Path -> Partition: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part + name srcbucket_mapjoin_part + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1268337453 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part + name srcbucket_mapjoin_part + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1268337453 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: srcbucket_mapjoin_part + name: srcbucket_mapjoin_part + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09 + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part + name srcbucket_mapjoin_part + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1268337453 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part + name srcbucket_mapjoin_part + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1268337453 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: srcbucket_mapjoin_part + name: srcbucket_mapjoin_part + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-57-39_803_2931597555732627538/10002 + Select Operator + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Needs Tagging: false + Path -> Alias: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-57-39_803_2931597555732627538/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-57-39_803_2931597555732627538/10002] + Path -> Partition: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-57-39_803_2931597555732627538/10002 + Partition + base file name: 10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns + columns.types + escape.delim \ + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns + columns.types + escape.delim \ + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-57-39_803_2931597555732627538/10001 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + serialization.format 1 + TotalFiles: 1 + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select count(1) from ( +select /*+mapjoin(a)*/ a.key as key, a.value as v1, b.value as v2 +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key) subq +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-09 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-57-40_859_100378860743491241/10000 +POSTHOOK: query: select count(1) from ( +select /*+mapjoin(a)*/ a.key as key, a.value as v1, b.value as v2 +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key) subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-09 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-57-40_859_100378860743491241/10000 +928 +PREHOOK: query: explain extended +select * from ( +select /*+mapjoin(a)*/ a.key as key, a.value as v1, b.value as v2 +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key) subq where v1 != '100' +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from ( +select /*+mapjoin(a)*/ a.key as key, a.value as v1, b.value as v2 +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key) subq where v1 != '100' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF srcbucket_mapjoin a) (TOK_TABREF srcbucket_mapjoin_part b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) v1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) v2)))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (!= (TOK_TABLE_OR_COL v1) '100')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + subq:b + TableScan + alias: b + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col3 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + Filter Operator + isSamplingPred: false + predicate: + expr: (_col1 <> '100') + type: boolean + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-01_245_6839618835307124871/10001 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + serialization.format 1 + TotalFiles: 1 + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + subq:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subq:a + TableScan + alias: a + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col3 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + Filter Operator + isSamplingPred: false + predicate: + expr: (_col1 <> '100') + type: boolean + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-01_245_6839618835307124871/10001 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + serialization.format 1 + TotalFiles: 1 + MultiFileSpray: false + Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + subq:a {srcbucket20.txt=[srcbucket20.txt], srcbucket21.txt=[srcbucket21.txt], srcbucket22.txt=[srcbucket20.txt], srcbucket23.txt=[srcbucket21.txt], ds=2008-04-09/srcbucket20.txt=[srcbucket20.txt], ds=2008-04-09/srcbucket21.txt=[srcbucket21.txt], ds=2008-04-09/srcbucket22.txt=[srcbucket20.txt], ds=2008-04-09/srcbucket23.txt=[srcbucket21.txt]} + Alias Bucket File Name Mapping: + subq:a {file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket20.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket21.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket22.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket23.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt]} + Needs Tagging: false + Path -> Alias: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 [subq:b] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09 [subq:b] + Path -> Partition: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part + name srcbucket_mapjoin_part + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1268337453 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part + name srcbucket_mapjoin_part + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1268337453 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: srcbucket_mapjoin_part + name: srcbucket_mapjoin_part + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09 + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part + name srcbucket_mapjoin_part + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1268337453 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part + name srcbucket_mapjoin_part + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1268337453 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: srcbucket_mapjoin_part + name: srcbucket_mapjoin_part + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select * from ( +select /*+mapjoin(a)*/ a.key as key, a.value as v1, b.value as v2 +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key) subq where v1 != '100' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-09 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-02_116_107478033053622597/10000 +POSTHOOK: query: select * from ( +select /*+mapjoin(a)*/ a.key as key, a.value as v1, b.value as v2 +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key) subq where v1 != '100' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-09 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-02_116_107478033053622597/10000 +165 val_165 val_165 +165 val_165 val_165 +484 val_484 val_484 +150 val_150 val_150 +224 val_224 val_224 +224 val_224 val_224 +66 val_66 val_66 +213 val_213 val_213 +213 val_213 val_213 +374 val_374 val_374 +495 val_495 val_495 +37 val_37 val_37 +37 val_37 val_37 +327 val_327 val_327 +327 val_327 val_327 +327 val_327 val_327 +15 val_15 val_15 +15 val_15 val_15 +338 val_338 val_338 +459 val_459 val_459 +459 val_459 val_459 +466 val_466 val_466 +466 val_466 val_466 +466 val_466 val_466 +396 val_396 val_396 +396 val_396 val_396 +396 val_396 val_396 +309 val_309 val_309 +309 val_309 val_309 +367 val_367 val_367 +367 val_367 val_367 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +455 val_455 val_455 +316 val_316 val_316 +316 val_316 val_316 +316 val_316 val_316 +345 val_345 val_345 +129 val_129 val_129 +129 val_129 val_129 +378 val_378 val_378 +4 val_4 val_4 +356 val_356 val_356 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +125 val_125 val_125 +125 val_125 val_125 +437 val_437 val_437 +286 val_286 val_286 +187 val_187 val_187 +187 val_187 val_187 +187 val_187 val_187 +176 val_176 val_176 +176 val_176 val_176 +459 val_459 val_459 +459 val_459 val_459 +51 val_51 val_51 +51 val_51 val_51 +103 val_103 val_103 +103 val_103 val_103 +239 val_239 val_239 +239 val_239 val_239 +213 val_213 val_213 +213 val_213 val_213 +176 val_176 val_176 +176 val_176 val_176 +275 val_275 val_275 +260 val_260 val_260 +404 val_404 val_404 +404 val_404 val_404 +217 val_217 val_217 +217 val_217 val_217 +84 val_84 val_84 +84 val_84 val_84 +466 val_466 val_466 +466 val_466 val_466 +466 val_466 val_466 +8 val_8 val_8 +411 val_411 val_411 +172 val_172 val_172 +172 val_172 val_172 +129 val_129 val_129 +129 val_129 val_129 +158 val_158 val_158 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +26 val_26 val_26 +26 val_26 val_26 +165 val_165 val_165 +165 val_165 val_165 +327 val_327 val_327 +327 val_327 val_327 +327 val_327 val_327 +51 val_51 val_51 +51 val_51 val_51 +404 val_404 val_404 +404 val_404 val_404 +95 val_95 val_95 +95 val_95 val_95 +282 val_282 val_282 +282 val_282 val_282 +187 val_187 val_187 +187 val_187 val_187 +187 val_187 val_187 +316 val_316 val_316 +316 val_316 val_316 +316 val_316 val_316 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +77 val_77 val_77 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +118 val_118 val_118 +118 val_118 val_118 +282 val_282 val_282 +282 val_282 val_282 +419 val_419 val_419 +15 val_15 val_15 +15 val_15 val_15 +118 val_118 val_118 +118 val_118 val_118 +19 val_19 val_19 +224 val_224 val_224 +224 val_224 val_224 +309 val_309 val_309 +309 val_309 val_309 +389 val_389 val_389 +327 val_327 val_327 +327 val_327 val_327 +327 val_327 val_327 +242 val_242 val_242 +242 val_242 val_242 +392 val_392 val_392 +242 val_242 val_242 +242 val_242 val_242 +396 val_396 val_396 +396 val_396 val_396 +396 val_396 val_396 +95 val_95 val_95 +95 val_95 val_95 +11 val_11 val_11 +143 val_143 val_143 +228 val_228 val_228 +33 val_33 val_33 +103 val_103 val_103 +103 val_103 val_103 +367 val_367 val_367 +367 val_367 val_367 +239 val_239 val_239 +239 val_239 val_239 +480 val_480 val_480 +480 val_480 val_480 +480 val_480 val_480 +202 val_202 val_202 +316 val_316 val_316 +316 val_316 val_316 +316 val_316 val_316 +235 val_235 val_235 +80 val_80 val_80 +44 val_44 val_44 +466 val_466 val_466 +466 val_466 val_466 +466 val_466 val_466 +257 val_257 val_257 +190 val_190 val_190 +114 val_114 val_114 +396 val_396 val_396 +396 val_396 val_396 +396 val_396 val_396 +217 val_217 val_217 +217 val_217 val_217 +125 val_125 val_125 +125 val_125 val_125 +187 val_187 val_187 +187 val_187 val_187 +187 val_187 val_187 +480 val_480 val_480 +480 val_480 val_480 +480 val_480 val_480 +491 val_491 val_491 +305 val_305 val_305 +444 val_444 val_444 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +323 val_323 val_323 +480 val_480 val_480 +480 val_480 val_480 +480 val_480 val_480 +136 val_136 val_136 +172 val_172 val_172 +172 val_172 val_172 +462 val_462 val_462 +462 val_462 val_462 +26 val_26 val_26 +26 val_26 val_26 +462 val_462 val_462 +462 val_462 val_462 +341 val_341 val_341 +183 val_183 val_183 +84 val_84 val_84 +84 val_84 val_84 +37 val_37 val_37 +37 val_37 val_37 +448 val_448 val_448 +194 val_194 val_194 +477 val_477 val_477 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +400 val_400 val_400 +27 val_27 val_27 +409 val_409 val_409 +409 val_409 val_409 +409 val_409 val_409 +265 val_265 val_265 +265 val_265 val_265 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +166 val_166 val_166 +430 val_430 val_430 +430 val_430 val_430 +430 val_430 val_430 +287 val_287 val_287 +207 val_207 val_207 +207 val_207 val_207 +199 val_199 val_199 +199 val_199 val_199 +199 val_199 val_199 +247 val_247 val_247 +489 val_489 val_489 +489 val_489 val_489 +489 val_489 val_489 +489 val_489 val_489 +162 val_162 val_162 +397 val_397 val_397 +397 val_397 val_397 +342 val_342 val_342 +342 val_342 val_342 +195 val_195 val_195 +195 val_195 val_195 +155 val_155 val_155 +203 val_203 val_203 +203 val_203 val_203 +339 val_339 val_339 +302 val_302 val_302 +438 val_438 val_438 +438 val_438 val_438 +438 val_438 val_438 +489 val_489 val_489 +489 val_489 val_489 +489 val_489 val_489 +489 val_489 val_489 +221 val_221 val_221 +221 val_221 val_221 +92 val_92 val_92 +111 val_111 val_111 +427 val_427 val_427 +382 val_382 val_382 +382 val_382 val_382 +386 val_386 val_386 +430 val_430 val_430 +430 val_430 val_430 +430 val_430 val_430 +221 val_221 val_221 +221 val_221 val_221 +137 val_137 val_137 +137 val_137 val_137 +180 val_180 val_180 +12 val_12 val_12 +12 val_12 val_12 +67 val_67 val_67 +67 val_67 val_67 +489 val_489 val_489 +489 val_489 val_489 +489 val_489 val_489 +489 val_489 val_489 +353 val_353 val_353 +353 val_353 val_353 +272 val_272 val_272 +272 val_272 val_272 +463 val_463 val_463 +463 val_463 val_463 +119 val_119 val_119 +119 val_119 val_119 +119 val_119 val_119 +496 val_496 val_496 +393 val_393 val_393 +100 val_100 val_100 +100 val_100 val_100 +298 val_298 val_298 +298 val_298 val_298 +298 val_298 val_298 +199 val_199 val_199 +199 val_199 val_199 +199 val_199 val_199 +191 val_191 val_191 +191 val_191 val_191 +96 val_96 val_96 +481 val_481 val_481 +409 val_409 val_409 +409 val_409 val_409 +409 val_409 val_409 +470 val_470 val_470 +137 val_137 val_137 +137 val_137 val_137 +85 val_85 val_85 +364 val_364 val_364 +306 val_306 val_306 +272 val_272 val_272 +272 val_272 val_272 +331 val_331 val_331 +331 val_331 val_331 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +452 val_452 val_452 +177 val_177 val_177 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +317 val_317 val_317 +317 val_317 val_317 +34 val_34 val_34 +229 val_229 val_229 +229 val_229 val_229 +195 val_195 val_195 +195 val_195 val_195 +430 val_430 val_430 +430 val_430 val_430 +430 val_430 val_430 +119 val_119 val_119 +119 val_119 val_119 +119 val_119 val_119 +489 val_489 val_489 +489 val_489 val_489 +489 val_489 val_489 +489 val_489 val_489 +78 val_78 val_78 +41 val_41 val_41 +492 val_492 val_492 +492 val_492 val_492 +449 val_449 val_449 +218 val_218 val_218 +30 val_30 val_30 +74 val_74 val_74 +342 val_342 val_342 +342 val_342 val_342 +368 val_368 val_368 +485 val_485 val_485 +70 val_70 val_70 +70 val_70 val_70 +70 val_70 val_70 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +191 val_191 val_191 +191 val_191 val_191 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +438 val_438 val_438 +438 val_438 val_438 +438 val_438 val_438 +467 val_467 val_467 +229 val_229 val_229 +229 val_229 val_229 +463 val_463 val_463 +463 val_463 val_463 +283 val_283 val_283 +331 val_331 val_331 +331 val_331 val_331 +335 val_335 val_335 +104 val_104 val_104 +104 val_104 val_104 +409 val_409 val_409 +409 val_409 val_409 +409 val_409 val_409 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +258 val_258 val_258 +203 val_203 val_203 +203 val_203 val_203 +12 val_12 val_12 +12 val_12 val_12 +478 val_478 val_478 +478 val_478 val_478 +298 val_298 val_298 +298 val_298 val_298 +298 val_298 val_298 +382 val_382 val_382 +382 val_382 val_382 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +70 val_70 val_70 +70 val_70 val_70 +70 val_70 val_70 +397 val_397 val_397 +397 val_397 val_397 +104 val_104 val_104 +104 val_104 val_104 +70 val_70 val_70 +70 val_70 val_70 +70 val_70 val_70 +438 val_438 val_438 +438 val_438 val_438 +438 val_438 val_438 +119 val_119 val_119 +119 val_119 val_119 +119 val_119 val_119 +360 val_360 val_360 +199 val_199 val_199 +199 val_199 val_199 +199 val_199 val_199 +478 val_478 val_478 +478 val_478 val_478 +317 val_317 val_317 +317 val_317 val_317 +207 val_207 val_207 +207 val_207 val_207 +265 val_265 val_265 +265 val_265 val_265 +353 val_353 val_353 +353 val_353 val_353 +214 val_214 val_214 +133 val_133 val_133 +375 val_375 val_375 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +67 val_67 val_67 +67 val_67 val_67 +379 val_379 val_379 +492 val_492 val_492 +492 val_492 val_492 +100 val_100 val_100 +100 val_100 val_100 +298 val_298 val_298 +298 val_298 val_298 +298 val_298 val_298 +9 val_9 val_9 +126 val_126 val_126 +165 val_165 val_165 +165 val_165 val_165 +484 val_484 val_484 +150 val_150 val_150 +224 val_224 val_224 +224 val_224 val_224 +66 val_66 val_66 +213 val_213 val_213 +213 val_213 val_213 +374 val_374 val_374 +495 val_495 val_495 +37 val_37 val_37 +37 val_37 val_37 +327 val_327 val_327 +327 val_327 val_327 +327 val_327 val_327 +15 val_15 val_15 +15 val_15 val_15 +338 val_338 val_338 +459 val_459 val_459 +459 val_459 val_459 +466 val_466 val_466 +466 val_466 val_466 +466 val_466 val_466 +396 val_396 val_396 +396 val_396 val_396 +396 val_396 val_396 +309 val_309 val_309 +309 val_309 val_309 +367 val_367 val_367 +367 val_367 val_367 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +455 val_455 val_455 +316 val_316 val_316 +316 val_316 val_316 +316 val_316 val_316 +345 val_345 val_345 +129 val_129 val_129 +129 val_129 val_129 +378 val_378 val_378 +4 val_4 val_4 +356 val_356 val_356 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +125 val_125 val_125 +125 val_125 val_125 +437 val_437 val_437 +286 val_286 val_286 +187 val_187 val_187 +187 val_187 val_187 +187 val_187 val_187 +176 val_176 val_176 +176 val_176 val_176 +459 val_459 val_459 +459 val_459 val_459 +51 val_51 val_51 +51 val_51 val_51 +103 val_103 val_103 +103 val_103 val_103 +239 val_239 val_239 +239 val_239 val_239 +213 val_213 val_213 +213 val_213 val_213 +176 val_176 val_176 +176 val_176 val_176 +275 val_275 val_275 +260 val_260 val_260 +404 val_404 val_404 +404 val_404 val_404 +217 val_217 val_217 +217 val_217 val_217 +84 val_84 val_84 +84 val_84 val_84 +466 val_466 val_466 +466 val_466 val_466 +466 val_466 val_466 +8 val_8 val_8 +411 val_411 val_411 +172 val_172 val_172 +172 val_172 val_172 +129 val_129 val_129 +129 val_129 val_129 +158 val_158 val_158 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +26 val_26 val_26 +26 val_26 val_26 +165 val_165 val_165 +165 val_165 val_165 +327 val_327 val_327 +327 val_327 val_327 +327 val_327 val_327 +51 val_51 val_51 +51 val_51 val_51 +404 val_404 val_404 +404 val_404 val_404 +95 val_95 val_95 +95 val_95 val_95 +282 val_282 val_282 +282 val_282 val_282 +187 val_187 val_187 +187 val_187 val_187 +187 val_187 val_187 +316 val_316 val_316 +316 val_316 val_316 +316 val_316 val_316 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +77 val_77 val_77 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +118 val_118 val_118 +118 val_118 val_118 +282 val_282 val_282 +282 val_282 val_282 +419 val_419 val_419 +15 val_15 val_15 +15 val_15 val_15 +118 val_118 val_118 +118 val_118 val_118 +19 val_19 val_19 +224 val_224 val_224 +224 val_224 val_224 +309 val_309 val_309 +309 val_309 val_309 +389 val_389 val_389 +327 val_327 val_327 +327 val_327 val_327 +327 val_327 val_327 +242 val_242 val_242 +242 val_242 val_242 +392 val_392 val_392 +242 val_242 val_242 +242 val_242 val_242 +396 val_396 val_396 +396 val_396 val_396 +396 val_396 val_396 +95 val_95 val_95 +95 val_95 val_95 +11 val_11 val_11 +143 val_143 val_143 +228 val_228 val_228 +33 val_33 val_33 +103 val_103 val_103 +103 val_103 val_103 +367 val_367 val_367 +367 val_367 val_367 +239 val_239 val_239 +239 val_239 val_239 +480 val_480 val_480 +480 val_480 val_480 +480 val_480 val_480 +202 val_202 val_202 +316 val_316 val_316 +316 val_316 val_316 +316 val_316 val_316 +235 val_235 val_235 +80 val_80 val_80 +44 val_44 val_44 +466 val_466 val_466 +466 val_466 val_466 +466 val_466 val_466 +257 val_257 val_257 +190 val_190 val_190 +114 val_114 val_114 +396 val_396 val_396 +396 val_396 val_396 +396 val_396 val_396 +217 val_217 val_217 +217 val_217 val_217 +125 val_125 val_125 +125 val_125 val_125 +187 val_187 val_187 +187 val_187 val_187 +187 val_187 val_187 +480 val_480 val_480 +480 val_480 val_480 +480 val_480 val_480 +491 val_491 val_491 +305 val_305 val_305 +444 val_444 val_444 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +323 val_323 val_323 +480 val_480 val_480 +480 val_480 val_480 +480 val_480 val_480 +136 val_136 val_136 +172 val_172 val_172 +172 val_172 val_172 +462 val_462 val_462 +462 val_462 val_462 +26 val_26 val_26 +26 val_26 val_26 +462 val_462 val_462 +462 val_462 val_462 +341 val_341 val_341 +183 val_183 val_183 +84 val_84 val_84 +84 val_84 val_84 +37 val_37 val_37 +37 val_37 val_37 +448 val_448 val_448 +194 val_194 val_194 +477 val_477 val_477 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +400 val_400 val_400 +27 val_27 val_27 +409 val_409 val_409 +409 val_409 val_409 +409 val_409 val_409 +265 val_265 val_265 +265 val_265 val_265 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +166 val_166 val_166 +430 val_430 val_430 +430 val_430 val_430 +430 val_430 val_430 +287 val_287 val_287 +207 val_207 val_207 +207 val_207 val_207 +199 val_199 val_199 +199 val_199 val_199 +199 val_199 val_199 +247 val_247 val_247 +489 val_489 val_489 +489 val_489 val_489 +489 val_489 val_489 +489 val_489 val_489 +162 val_162 val_162 +397 val_397 val_397 +397 val_397 val_397 +342 val_342 val_342 +342 val_342 val_342 +195 val_195 val_195 +195 val_195 val_195 +155 val_155 val_155 +203 val_203 val_203 +203 val_203 val_203 +339 val_339 val_339 +302 val_302 val_302 +438 val_438 val_438 +438 val_438 val_438 +438 val_438 val_438 +489 val_489 val_489 +489 val_489 val_489 +489 val_489 val_489 +489 val_489 val_489 +221 val_221 val_221 +221 val_221 val_221 +92 val_92 val_92 +111 val_111 val_111 +427 val_427 val_427 +382 val_382 val_382 +382 val_382 val_382 +386 val_386 val_386 +430 val_430 val_430 +430 val_430 val_430 +430 val_430 val_430 +221 val_221 val_221 +221 val_221 val_221 +137 val_137 val_137 +137 val_137 val_137 +180 val_180 val_180 +12 val_12 val_12 +12 val_12 val_12 +67 val_67 val_67 +67 val_67 val_67 +489 val_489 val_489 +489 val_489 val_489 +489 val_489 val_489 +489 val_489 val_489 +353 val_353 val_353 +353 val_353 val_353 +272 val_272 val_272 +272 val_272 val_272 +463 val_463 val_463 +463 val_463 val_463 +119 val_119 val_119 +119 val_119 val_119 +119 val_119 val_119 +496 val_496 val_496 +393 val_393 val_393 +100 val_100 val_100 +100 val_100 val_100 +298 val_298 val_298 +298 val_298 val_298 +298 val_298 val_298 +199 val_199 val_199 +199 val_199 val_199 +199 val_199 val_199 +191 val_191 val_191 +191 val_191 val_191 +96 val_96 val_96 +481 val_481 val_481 +409 val_409 val_409 +409 val_409 val_409 +409 val_409 val_409 +470 val_470 val_470 +137 val_137 val_137 +137 val_137 val_137 +85 val_85 val_85 +364 val_364 val_364 +306 val_306 val_306 +272 val_272 val_272 +272 val_272 val_272 +331 val_331 val_331 +331 val_331 val_331 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +452 val_452 val_452 +177 val_177 val_177 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +317 val_317 val_317 +317 val_317 val_317 +34 val_34 val_34 +229 val_229 val_229 +229 val_229 val_229 +195 val_195 val_195 +195 val_195 val_195 +430 val_430 val_430 +430 val_430 val_430 +430 val_430 val_430 +119 val_119 val_119 +119 val_119 val_119 +119 val_119 val_119 +489 val_489 val_489 +489 val_489 val_489 +489 val_489 val_489 +489 val_489 val_489 +78 val_78 val_78 +41 val_41 val_41 +492 val_492 val_492 +492 val_492 val_492 +449 val_449 val_449 +218 val_218 val_218 +30 val_30 val_30 +74 val_74 val_74 +342 val_342 val_342 +342 val_342 val_342 +368 val_368 val_368 +485 val_485 val_485 +70 val_70 val_70 +70 val_70 val_70 +70 val_70 val_70 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +191 val_191 val_191 +191 val_191 val_191 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +438 val_438 val_438 +438 val_438 val_438 +438 val_438 val_438 +467 val_467 val_467 +229 val_229 val_229 +229 val_229 val_229 +463 val_463 val_463 +463 val_463 val_463 +283 val_283 val_283 +331 val_331 val_331 +331 val_331 val_331 +335 val_335 val_335 +104 val_104 val_104 +104 val_104 val_104 +409 val_409 val_409 +409 val_409 val_409 +409 val_409 val_409 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +258 val_258 val_258 +203 val_203 val_203 +203 val_203 val_203 +12 val_12 val_12 +12 val_12 val_12 +478 val_478 val_478 +478 val_478 val_478 +298 val_298 val_298 +298 val_298 val_298 +298 val_298 val_298 +382 val_382 val_382 +382 val_382 val_382 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +70 val_70 val_70 +70 val_70 val_70 +70 val_70 val_70 +397 val_397 val_397 +397 val_397 val_397 +104 val_104 val_104 +104 val_104 val_104 +70 val_70 val_70 +70 val_70 val_70 +70 val_70 val_70 +438 val_438 val_438 +438 val_438 val_438 +438 val_438 val_438 +119 val_119 val_119 +119 val_119 val_119 +119 val_119 val_119 +360 val_360 val_360 +199 val_199 val_199 +199 val_199 val_199 +199 val_199 val_199 +478 val_478 val_478 +478 val_478 val_478 +317 val_317 val_317 +317 val_317 val_317 +207 val_207 val_207 +207 val_207 val_207 +265 val_265 val_265 +265 val_265 val_265 +353 val_353 val_353 +353 val_353 val_353 +214 val_214 val_214 +133 val_133 val_133 +375 val_375 val_375 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +67 val_67 val_67 +67 val_67 val_67 +379 val_379 val_379 +492 val_492 val_492 +492 val_492 val_492 +100 val_100 val_100 +100 val_100 val_100 +298 val_298 val_298 +298 val_298 val_298 +298 val_298 val_298 +9 val_9 val_9 +126 val_126 val_126 +PREHOOK: query: explain extended +select /*+mapjoin(a)*/ a.key as key, a.value as v1, b.value as v2 +from +(select * from srcbucket_mapjoin where value != '100') a +join srcbucket_mapjoin_part b +on a.key=b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select /*+mapjoin(a)*/ a.key as key, a.value as v1, b.value as v2 +from +(select * from srcbucket_mapjoin where value != '100') a +join srcbucket_mapjoin_part b +on a.key=b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF srcbucket_mapjoin)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (!= (TOK_TABLE_OR_COL value) '100')))) a) (TOK_TABREF srcbucket_mapjoin_part b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) v1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) v2)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col3 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-11_686_2591402424664763165/10001 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + serialization.format 1 + TotalFiles: 1 + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + a:srcbucket_mapjoin + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a:srcbucket_mapjoin + TableScan + alias: srcbucket_mapjoin + Filter Operator + isSamplingPred: false + predicate: + expr: (value <> '100') + type: boolean + Filter Operator + isSamplingPred: false + predicate: + expr: (value <> '100') + type: boolean + Select Operator + expressions: + expr: key + type: int + expr: value + type: string + outputColumnNames: _col0, _col1 + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col3 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-11_686_2591402424664763165/10001 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + serialization.format 1 + TotalFiles: 1 + MultiFileSpray: false + Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + a:srcbucket_mapjoin {srcbucket20.txt=[srcbucket20.txt], srcbucket21.txt=[srcbucket21.txt], srcbucket22.txt=[srcbucket20.txt], srcbucket23.txt=[srcbucket21.txt], ds=2008-04-09/srcbucket20.txt=[srcbucket20.txt], ds=2008-04-09/srcbucket21.txt=[srcbucket21.txt], ds=2008-04-09/srcbucket22.txt=[srcbucket20.txt], ds=2008-04-09/srcbucket23.txt=[srcbucket21.txt]} + Alias Bucket File Name Mapping: + a:srcbucket_mapjoin {file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket20.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket21.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket22.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket23.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt]} + Needs Tagging: false + Path -> Alias: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 [b] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09 [b] + Path -> Partition: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part + name srcbucket_mapjoin_part + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1268337453 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part + name srcbucket_mapjoin_part + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1268337453 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: srcbucket_mapjoin_part + name: srcbucket_mapjoin_part + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09 + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part + name srcbucket_mapjoin_part + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1268337453 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part + name srcbucket_mapjoin_part + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1268337453 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: srcbucket_mapjoin_part + name: srcbucket_mapjoin_part + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select /*+mapjoin(a)*/ a.key as key, a.value as v1, b.value as v2 +from +(select * from srcbucket_mapjoin where value != '100') a +join srcbucket_mapjoin_part b +on a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-09 +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-12_273_1548092167145872326/10000 +POSTHOOK: query: select /*+mapjoin(a)*/ a.key as key, a.value as v1, b.value as v2 +from +(select * from srcbucket_mapjoin where value != '100') a +join srcbucket_mapjoin_part b +on a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-09 +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-12_273_1548092167145872326/10000 +165 val_165 val_165 +165 val_165 val_165 +484 val_484 val_484 +150 val_150 val_150 +224 val_224 val_224 +224 val_224 val_224 +66 val_66 val_66 +213 val_213 val_213 +213 val_213 val_213 +374 val_374 val_374 +495 val_495 val_495 +37 val_37 val_37 +37 val_37 val_37 +327 val_327 val_327 +327 val_327 val_327 +327 val_327 val_327 +15 val_15 val_15 +15 val_15 val_15 +338 val_338 val_338 +459 val_459 val_459 +459 val_459 val_459 +466 val_466 val_466 +466 val_466 val_466 +466 val_466 val_466 +396 val_396 val_396 +396 val_396 val_396 +396 val_396 val_396 +309 val_309 val_309 +309 val_309 val_309 +367 val_367 val_367 +367 val_367 val_367 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +455 val_455 val_455 +316 val_316 val_316 +316 val_316 val_316 +316 val_316 val_316 +345 val_345 val_345 +129 val_129 val_129 +129 val_129 val_129 +378 val_378 val_378 +4 val_4 val_4 +356 val_356 val_356 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +125 val_125 val_125 +125 val_125 val_125 +437 val_437 val_437 +286 val_286 val_286 +187 val_187 val_187 +187 val_187 val_187 +187 val_187 val_187 +176 val_176 val_176 +176 val_176 val_176 +459 val_459 val_459 +459 val_459 val_459 +51 val_51 val_51 +51 val_51 val_51 +103 val_103 val_103 +103 val_103 val_103 +239 val_239 val_239 +239 val_239 val_239 +213 val_213 val_213 +213 val_213 val_213 +176 val_176 val_176 +176 val_176 val_176 +275 val_275 val_275 +260 val_260 val_260 +404 val_404 val_404 +404 val_404 val_404 +217 val_217 val_217 +217 val_217 val_217 +84 val_84 val_84 +84 val_84 val_84 +466 val_466 val_466 +466 val_466 val_466 +466 val_466 val_466 +8 val_8 val_8 +411 val_411 val_411 +172 val_172 val_172 +172 val_172 val_172 +129 val_129 val_129 +129 val_129 val_129 +158 val_158 val_158 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +26 val_26 val_26 +26 val_26 val_26 +165 val_165 val_165 +165 val_165 val_165 +327 val_327 val_327 +327 val_327 val_327 +327 val_327 val_327 +51 val_51 val_51 +51 val_51 val_51 +404 val_404 val_404 +404 val_404 val_404 +95 val_95 val_95 +95 val_95 val_95 +282 val_282 val_282 +282 val_282 val_282 +187 val_187 val_187 +187 val_187 val_187 +187 val_187 val_187 +316 val_316 val_316 +316 val_316 val_316 +316 val_316 val_316 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +77 val_77 val_77 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +118 val_118 val_118 +118 val_118 val_118 +282 val_282 val_282 +282 val_282 val_282 +419 val_419 val_419 +15 val_15 val_15 +15 val_15 val_15 +118 val_118 val_118 +118 val_118 val_118 +19 val_19 val_19 +224 val_224 val_224 +224 val_224 val_224 +309 val_309 val_309 +309 val_309 val_309 +389 val_389 val_389 +327 val_327 val_327 +327 val_327 val_327 +327 val_327 val_327 +242 val_242 val_242 +242 val_242 val_242 +392 val_392 val_392 +242 val_242 val_242 +242 val_242 val_242 +396 val_396 val_396 +396 val_396 val_396 +396 val_396 val_396 +95 val_95 val_95 +95 val_95 val_95 +11 val_11 val_11 +143 val_143 val_143 +228 val_228 val_228 +33 val_33 val_33 +103 val_103 val_103 +103 val_103 val_103 +367 val_367 val_367 +367 val_367 val_367 +239 val_239 val_239 +239 val_239 val_239 +480 val_480 val_480 +480 val_480 val_480 +480 val_480 val_480 +202 val_202 val_202 +316 val_316 val_316 +316 val_316 val_316 +316 val_316 val_316 +235 val_235 val_235 +80 val_80 val_80 +44 val_44 val_44 +466 val_466 val_466 +466 val_466 val_466 +466 val_466 val_466 +257 val_257 val_257 +190 val_190 val_190 +114 val_114 val_114 +396 val_396 val_396 +396 val_396 val_396 +396 val_396 val_396 +217 val_217 val_217 +217 val_217 val_217 +125 val_125 val_125 +125 val_125 val_125 +187 val_187 val_187 +187 val_187 val_187 +187 val_187 val_187 +480 val_480 val_480 +480 val_480 val_480 +480 val_480 val_480 +491 val_491 val_491 +305 val_305 val_305 +444 val_444 val_444 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +323 val_323 val_323 +480 val_480 val_480 +480 val_480 val_480 +480 val_480 val_480 +136 val_136 val_136 +172 val_172 val_172 +172 val_172 val_172 +462 val_462 val_462 +462 val_462 val_462 +26 val_26 val_26 +26 val_26 val_26 +462 val_462 val_462 +462 val_462 val_462 +341 val_341 val_341 +183 val_183 val_183 +84 val_84 val_84 +84 val_84 val_84 +37 val_37 val_37 +37 val_37 val_37 +448 val_448 val_448 +194 val_194 val_194 +477 val_477 val_477 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +400 val_400 val_400 +27 val_27 val_27 +409 val_409 val_409 +409 val_409 val_409 +409 val_409 val_409 +265 val_265 val_265 +265 val_265 val_265 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +166 val_166 val_166 +430 val_430 val_430 +430 val_430 val_430 +430 val_430 val_430 +287 val_287 val_287 +207 val_207 val_207 +207 val_207 val_207 +199 val_199 val_199 +199 val_199 val_199 +199 val_199 val_199 +247 val_247 val_247 +489 val_489 val_489 +489 val_489 val_489 +489 val_489 val_489 +489 val_489 val_489 +162 val_162 val_162 +397 val_397 val_397 +397 val_397 val_397 +342 val_342 val_342 +342 val_342 val_342 +195 val_195 val_195 +195 val_195 val_195 +155 val_155 val_155 +203 val_203 val_203 +203 val_203 val_203 +339 val_339 val_339 +302 val_302 val_302 +438 val_438 val_438 +438 val_438 val_438 +438 val_438 val_438 +489 val_489 val_489 +489 val_489 val_489 +489 val_489 val_489 +489 val_489 val_489 +221 val_221 val_221 +221 val_221 val_221 +92 val_92 val_92 +111 val_111 val_111 +427 val_427 val_427 +382 val_382 val_382 +382 val_382 val_382 +386 val_386 val_386 +430 val_430 val_430 +430 val_430 val_430 +430 val_430 val_430 +221 val_221 val_221 +221 val_221 val_221 +137 val_137 val_137 +137 val_137 val_137 +180 val_180 val_180 +12 val_12 val_12 +12 val_12 val_12 +67 val_67 val_67 +67 val_67 val_67 +489 val_489 val_489 +489 val_489 val_489 +489 val_489 val_489 +489 val_489 val_489 +353 val_353 val_353 +353 val_353 val_353 +272 val_272 val_272 +272 val_272 val_272 +463 val_463 val_463 +463 val_463 val_463 +119 val_119 val_119 +119 val_119 val_119 +119 val_119 val_119 +496 val_496 val_496 +393 val_393 val_393 +100 val_100 val_100 +100 val_100 val_100 +298 val_298 val_298 +298 val_298 val_298 +298 val_298 val_298 +199 val_199 val_199 +199 val_199 val_199 +199 val_199 val_199 +191 val_191 val_191 +191 val_191 val_191 +96 val_96 val_96 +481 val_481 val_481 +409 val_409 val_409 +409 val_409 val_409 +409 val_409 val_409 +470 val_470 val_470 +137 val_137 val_137 +137 val_137 val_137 +85 val_85 val_85 +364 val_364 val_364 +306 val_306 val_306 +272 val_272 val_272 +272 val_272 val_272 +331 val_331 val_331 +331 val_331 val_331 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +452 val_452 val_452 +177 val_177 val_177 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +317 val_317 val_317 +317 val_317 val_317 +34 val_34 val_34 +229 val_229 val_229 +229 val_229 val_229 +195 val_195 val_195 +195 val_195 val_195 +430 val_430 val_430 +430 val_430 val_430 +430 val_430 val_430 +119 val_119 val_119 +119 val_119 val_119 +119 val_119 val_119 +489 val_489 val_489 +489 val_489 val_489 +489 val_489 val_489 +489 val_489 val_489 +78 val_78 val_78 +41 val_41 val_41 +492 val_492 val_492 +492 val_492 val_492 +449 val_449 val_449 +218 val_218 val_218 +30 val_30 val_30 +74 val_74 val_74 +342 val_342 val_342 +342 val_342 val_342 +368 val_368 val_368 +485 val_485 val_485 +70 val_70 val_70 +70 val_70 val_70 +70 val_70 val_70 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +191 val_191 val_191 +191 val_191 val_191 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +438 val_438 val_438 +438 val_438 val_438 +438 val_438 val_438 +467 val_467 val_467 +229 val_229 val_229 +229 val_229 val_229 +463 val_463 val_463 +463 val_463 val_463 +283 val_283 val_283 +331 val_331 val_331 +331 val_331 val_331 +335 val_335 val_335 +104 val_104 val_104 +104 val_104 val_104 +409 val_409 val_409 +409 val_409 val_409 +409 val_409 val_409 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +258 val_258 val_258 +203 val_203 val_203 +203 val_203 val_203 +12 val_12 val_12 +12 val_12 val_12 +478 val_478 val_478 +478 val_478 val_478 +298 val_298 val_298 +298 val_298 val_298 +298 val_298 val_298 +382 val_382 val_382 +382 val_382 val_382 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +70 val_70 val_70 +70 val_70 val_70 +70 val_70 val_70 +397 val_397 val_397 +397 val_397 val_397 +104 val_104 val_104 +104 val_104 val_104 +70 val_70 val_70 +70 val_70 val_70 +70 val_70 val_70 +438 val_438 val_438 +438 val_438 val_438 +438 val_438 val_438 +119 val_119 val_119 +119 val_119 val_119 +119 val_119 val_119 +360 val_360 val_360 +199 val_199 val_199 +199 val_199 val_199 +199 val_199 val_199 +478 val_478 val_478 +478 val_478 val_478 +317 val_317 val_317 +317 val_317 val_317 +207 val_207 val_207 +207 val_207 val_207 +265 val_265 val_265 +265 val_265 val_265 +353 val_353 val_353 +353 val_353 val_353 +214 val_214 val_214 +133 val_133 val_133 +375 val_375 val_375 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +67 val_67 val_67 +67 val_67 val_67 +379 val_379 val_379 +492 val_492 val_492 +492 val_492 val_492 +100 val_100 val_100 +100 val_100 val_100 +298 val_298 val_298 +298 val_298 val_298 +298 val_298 val_298 +9 val_9 val_9 +126 val_126 val_126 +165 val_165 val_165 +165 val_165 val_165 +484 val_484 val_484 +150 val_150 val_150 +224 val_224 val_224 +224 val_224 val_224 +66 val_66 val_66 +213 val_213 val_213 +213 val_213 val_213 +374 val_374 val_374 +495 val_495 val_495 +37 val_37 val_37 +37 val_37 val_37 +327 val_327 val_327 +327 val_327 val_327 +327 val_327 val_327 +15 val_15 val_15 +15 val_15 val_15 +338 val_338 val_338 +459 val_459 val_459 +459 val_459 val_459 +466 val_466 val_466 +466 val_466 val_466 +466 val_466 val_466 +396 val_396 val_396 +396 val_396 val_396 +396 val_396 val_396 +309 val_309 val_309 +309 val_309 val_309 +367 val_367 val_367 +367 val_367 val_367 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +455 val_455 val_455 +316 val_316 val_316 +316 val_316 val_316 +316 val_316 val_316 +345 val_345 val_345 +129 val_129 val_129 +129 val_129 val_129 +378 val_378 val_378 +4 val_4 val_4 +356 val_356 val_356 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +125 val_125 val_125 +125 val_125 val_125 +437 val_437 val_437 +286 val_286 val_286 +187 val_187 val_187 +187 val_187 val_187 +187 val_187 val_187 +176 val_176 val_176 +176 val_176 val_176 +459 val_459 val_459 +459 val_459 val_459 +51 val_51 val_51 +51 val_51 val_51 +103 val_103 val_103 +103 val_103 val_103 +239 val_239 val_239 +239 val_239 val_239 +213 val_213 val_213 +213 val_213 val_213 +176 val_176 val_176 +176 val_176 val_176 +275 val_275 val_275 +260 val_260 val_260 +404 val_404 val_404 +404 val_404 val_404 +217 val_217 val_217 +217 val_217 val_217 +84 val_84 val_84 +84 val_84 val_84 +466 val_466 val_466 +466 val_466 val_466 +466 val_466 val_466 +8 val_8 val_8 +411 val_411 val_411 +172 val_172 val_172 +172 val_172 val_172 +129 val_129 val_129 +129 val_129 val_129 +158 val_158 val_158 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +26 val_26 val_26 +26 val_26 val_26 +165 val_165 val_165 +165 val_165 val_165 +327 val_327 val_327 +327 val_327 val_327 +327 val_327 val_327 +51 val_51 val_51 +51 val_51 val_51 +404 val_404 val_404 +404 val_404 val_404 +95 val_95 val_95 +95 val_95 val_95 +282 val_282 val_282 +282 val_282 val_282 +187 val_187 val_187 +187 val_187 val_187 +187 val_187 val_187 +316 val_316 val_316 +316 val_316 val_316 +316 val_316 val_316 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +77 val_77 val_77 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +118 val_118 val_118 +118 val_118 val_118 +282 val_282 val_282 +282 val_282 val_282 +419 val_419 val_419 +15 val_15 val_15 +15 val_15 val_15 +118 val_118 val_118 +118 val_118 val_118 +19 val_19 val_19 +224 val_224 val_224 +224 val_224 val_224 +309 val_309 val_309 +309 val_309 val_309 +389 val_389 val_389 +327 val_327 val_327 +327 val_327 val_327 +327 val_327 val_327 +242 val_242 val_242 +242 val_242 val_242 +392 val_392 val_392 +242 val_242 val_242 +242 val_242 val_242 +396 val_396 val_396 +396 val_396 val_396 +396 val_396 val_396 +95 val_95 val_95 +95 val_95 val_95 +11 val_11 val_11 +143 val_143 val_143 +228 val_228 val_228 +33 val_33 val_33 +103 val_103 val_103 +103 val_103 val_103 +367 val_367 val_367 +367 val_367 val_367 +239 val_239 val_239 +239 val_239 val_239 +480 val_480 val_480 +480 val_480 val_480 +480 val_480 val_480 +202 val_202 val_202 +316 val_316 val_316 +316 val_316 val_316 +316 val_316 val_316 +235 val_235 val_235 +80 val_80 val_80 +44 val_44 val_44 +466 val_466 val_466 +466 val_466 val_466 +466 val_466 val_466 +257 val_257 val_257 +190 val_190 val_190 +114 val_114 val_114 +396 val_396 val_396 +396 val_396 val_396 +396 val_396 val_396 +217 val_217 val_217 +217 val_217 val_217 +125 val_125 val_125 +125 val_125 val_125 +187 val_187 val_187 +187 val_187 val_187 +187 val_187 val_187 +480 val_480 val_480 +480 val_480 val_480 +480 val_480 val_480 +491 val_491 val_491 +305 val_305 val_305 +444 val_444 val_444 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +323 val_323 val_323 +480 val_480 val_480 +480 val_480 val_480 +480 val_480 val_480 +136 val_136 val_136 +172 val_172 val_172 +172 val_172 val_172 +462 val_462 val_462 +462 val_462 val_462 +26 val_26 val_26 +26 val_26 val_26 +462 val_462 val_462 +462 val_462 val_462 +341 val_341 val_341 +183 val_183 val_183 +84 val_84 val_84 +84 val_84 val_84 +37 val_37 val_37 +37 val_37 val_37 +448 val_448 val_448 +194 val_194 val_194 +477 val_477 val_477 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +400 val_400 val_400 +27 val_27 val_27 +409 val_409 val_409 +409 val_409 val_409 +409 val_409 val_409 +265 val_265 val_265 +265 val_265 val_265 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +166 val_166 val_166 +430 val_430 val_430 +430 val_430 val_430 +430 val_430 val_430 +287 val_287 val_287 +207 val_207 val_207 +207 val_207 val_207 +199 val_199 val_199 +199 val_199 val_199 +199 val_199 val_199 +247 val_247 val_247 +489 val_489 val_489 +489 val_489 val_489 +489 val_489 val_489 +489 val_489 val_489 +162 val_162 val_162 +397 val_397 val_397 +397 val_397 val_397 +342 val_342 val_342 +342 val_342 val_342 +195 val_195 val_195 +195 val_195 val_195 +155 val_155 val_155 +203 val_203 val_203 +203 val_203 val_203 +339 val_339 val_339 +302 val_302 val_302 +438 val_438 val_438 +438 val_438 val_438 +438 val_438 val_438 +489 val_489 val_489 +489 val_489 val_489 +489 val_489 val_489 +489 val_489 val_489 +221 val_221 val_221 +221 val_221 val_221 +92 val_92 val_92 +111 val_111 val_111 +427 val_427 val_427 +382 val_382 val_382 +382 val_382 val_382 +386 val_386 val_386 +430 val_430 val_430 +430 val_430 val_430 +430 val_430 val_430 +221 val_221 val_221 +221 val_221 val_221 +137 val_137 val_137 +137 val_137 val_137 +180 val_180 val_180 +12 val_12 val_12 +12 val_12 val_12 +67 val_67 val_67 +67 val_67 val_67 +489 val_489 val_489 +489 val_489 val_489 +489 val_489 val_489 +489 val_489 val_489 +353 val_353 val_353 +353 val_353 val_353 +272 val_272 val_272 +272 val_272 val_272 +463 val_463 val_463 +463 val_463 val_463 +119 val_119 val_119 +119 val_119 val_119 +119 val_119 val_119 +496 val_496 val_496 +393 val_393 val_393 +100 val_100 val_100 +100 val_100 val_100 +298 val_298 val_298 +298 val_298 val_298 +298 val_298 val_298 +199 val_199 val_199 +199 val_199 val_199 +199 val_199 val_199 +191 val_191 val_191 +191 val_191 val_191 +96 val_96 val_96 +481 val_481 val_481 +409 val_409 val_409 +409 val_409 val_409 +409 val_409 val_409 +470 val_470 val_470 +137 val_137 val_137 +137 val_137 val_137 +85 val_85 val_85 +364 val_364 val_364 +306 val_306 val_306 +272 val_272 val_272 +272 val_272 val_272 +331 val_331 val_331 +331 val_331 val_331 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +452 val_452 val_452 +177 val_177 val_177 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +317 val_317 val_317 +317 val_317 val_317 +34 val_34 val_34 +229 val_229 val_229 +229 val_229 val_229 +195 val_195 val_195 +195 val_195 val_195 +430 val_430 val_430 +430 val_430 val_430 +430 val_430 val_430 +119 val_119 val_119 +119 val_119 val_119 +119 val_119 val_119 +489 val_489 val_489 +489 val_489 val_489 +489 val_489 val_489 +489 val_489 val_489 +78 val_78 val_78 +41 val_41 val_41 +492 val_492 val_492 +492 val_492 val_492 +449 val_449 val_449 +218 val_218 val_218 +30 val_30 val_30 +74 val_74 val_74 +342 val_342 val_342 +342 val_342 val_342 +368 val_368 val_368 +485 val_485 val_485 +70 val_70 val_70 +70 val_70 val_70 +70 val_70 val_70 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +191 val_191 val_191 +191 val_191 val_191 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +438 val_438 val_438 +438 val_438 val_438 +438 val_438 val_438 +467 val_467 val_467 +229 val_229 val_229 +229 val_229 val_229 +463 val_463 val_463 +463 val_463 val_463 +283 val_283 val_283 +331 val_331 val_331 +331 val_331 val_331 +335 val_335 val_335 +104 val_104 val_104 +104 val_104 val_104 +409 val_409 val_409 +409 val_409 val_409 +409 val_409 val_409 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +258 val_258 val_258 +203 val_203 val_203 +203 val_203 val_203 +12 val_12 val_12 +12 val_12 val_12 +478 val_478 val_478 +478 val_478 val_478 +298 val_298 val_298 +298 val_298 val_298 +298 val_298 val_298 +382 val_382 val_382 +382 val_382 val_382 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +70 val_70 val_70 +70 val_70 val_70 +70 val_70 val_70 +397 val_397 val_397 +397 val_397 val_397 +104 val_104 val_104 +104 val_104 val_104 +70 val_70 val_70 +70 val_70 val_70 +70 val_70 val_70 +438 val_438 val_438 +438 val_438 val_438 +438 val_438 val_438 +119 val_119 val_119 +119 val_119 val_119 +119 val_119 val_119 +360 val_360 val_360 +199 val_199 val_199 +199 val_199 val_199 +199 val_199 val_199 +478 val_478 val_478 +478 val_478 val_478 +317 val_317 val_317 +317 val_317 val_317 +207 val_207 val_207 +207 val_207 val_207 +265 val_265 val_265 +265 val_265 val_265 +353 val_353 val_353 +353 val_353 val_353 +214 val_214 val_214 +133 val_133 val_133 +375 val_375 val_375 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +67 val_67 val_67 +67 val_67 val_67 +379 val_379 val_379 +492 val_492 val_492 +492 val_492 val_492 +100 val_100 val_100 +100 val_100 val_100 +298 val_298 val_298 +298 val_298 val_298 +298 val_298 val_298 +9 val_9 val_9 +126 val_126 val_126 +PREHOOK: query: explain extended +select count(1) from ( +select /*+mapjoin(a)*/ a.key as key, a.value as v1, b.value as v2 +from +(select * from srcbucket_mapjoin where key<100) a +join srcbucket_mapjoin_part b +on a.key=b.key) subq +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select count(1) from ( +select /*+mapjoin(a)*/ a.key as key, a.value as v1, b.value as v2 +from +(select * from srcbucket_mapjoin where key<100) a +join srcbucket_mapjoin_part b +on a.key=b.key) subq +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF srcbucket_mapjoin)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 100)))) a) (TOK_TABREF srcbucket_mapjoin_part b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) v1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) v2)))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + subq:b + TableScan + alias: b + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + Position of Big Table: 1 + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-18_326_2716597204475394437/10002 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns + columns.types + escape.delim \ + TotalFiles: 1 + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + subq:a:srcbucket_mapjoin + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subq:a:srcbucket_mapjoin + TableScan + alias: srcbucket_mapjoin + Filter Operator + isSamplingPred: false + predicate: + expr: (key < 100) + type: boolean + Filter Operator + isSamplingPred: false + predicate: + expr: (key < 100) + type: boolean + Select Operator + expressions: + expr: key + type: int + outputColumnNames: _col0 + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + Position of Big Table: 1 + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-18_326_2716597204475394437/10002 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns + columns.types + escape.delim \ + TotalFiles: 1 + MultiFileSpray: false + Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + subq:a:srcbucket_mapjoin {srcbucket20.txt=[srcbucket20.txt], srcbucket21.txt=[srcbucket21.txt], srcbucket22.txt=[srcbucket20.txt], srcbucket23.txt=[srcbucket21.txt], ds=2008-04-09/srcbucket20.txt=[srcbucket20.txt], ds=2008-04-09/srcbucket21.txt=[srcbucket21.txt], ds=2008-04-09/srcbucket22.txt=[srcbucket20.txt], ds=2008-04-09/srcbucket23.txt=[srcbucket21.txt]} + Alias Bucket File Name Mapping: + subq:a:srcbucket_mapjoin {file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket20.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket21.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket22.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket23.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt]} + Needs Tagging: false + Path -> Alias: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 [subq:b] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09 [subq:b] + Path -> Partition: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part + name srcbucket_mapjoin_part + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1268337453 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part + name srcbucket_mapjoin_part + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1268337453 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: srcbucket_mapjoin_part + name: srcbucket_mapjoin_part + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09 + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part + name srcbucket_mapjoin_part + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1268337453 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part + name srcbucket_mapjoin_part + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1268337453 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: srcbucket_mapjoin_part + name: srcbucket_mapjoin_part + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-18_326_2716597204475394437/10002 + Select Operator + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Needs Tagging: false + Path -> Alias: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-18_326_2716597204475394437/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-18_326_2716597204475394437/10002] + Path -> Partition: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-18_326_2716597204475394437/10002 + Partition + base file name: 10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns + columns.types + escape.delim \ + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns + columns.types + escape.delim \ + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-18_326_2716597204475394437/10001 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + serialization.format 1 + TotalFiles: 1 + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select count(1) from +( +select /*+mapjoin(b)*/ a.key as key, a.value as v1, b.value as v2 +from +(select * from srcbucket_mapjoin where key<100) a +join srcbucket_mapjoin_part b +on a.key=b.key) subq +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-09 +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-18_896_187376100343148175/10000 +POSTHOOK: query: select count(1) from +( +select /*+mapjoin(b)*/ a.key as key, a.value as v1, b.value as v2 +from +(select * from srcbucket_mapjoin where key<100) a +join srcbucket_mapjoin_part b +on a.key=b.key) subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-09 +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-18_896_187376100343148175/10000 +156 +PREHOOK: query: explain extended +select count(1) from ( +select /*+mapjoin(a, c)*/ a.key as key, a.value as v1, b.value as v2 +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key join srcbucket_mapjoin_part c on b.key = c.key ) subq +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select count(1) from ( +select /*+mapjoin(a, c)*/ a.key as key, a.value as v1, b.value as v2 +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key join srcbucket_mapjoin_part c on b.key = c.key ) subq +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF srcbucket_mapjoin a) (TOK_TABREF srcbucket_mapjoin_part b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF srcbucket_mapjoin_part c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a c))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) v1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) v2)))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + subq:b + TableScan + alias: b + Common Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 + 1 + 2 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + 2 [Column[key]] + Position of Big Table: 1 + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-38_982_821040782184594547/10002 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns + columns.types + escape.delim \ + TotalFiles: 1 + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + subq:a + Fetch Operator + limit: -1 + subq:c + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subq:a + TableScan + alias: a + Common Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 + 1 + 2 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + 2 [Column[key]] + Position of Big Table: 1 + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-38_982_821040782184594547/10002 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns + columns.types + escape.delim \ + TotalFiles: 1 + MultiFileSpray: false + subq:c + TableScan + alias: c + Common Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 + 1 + 2 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + 2 [Column[key]] + Position of Big Table: 1 + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-38_982_821040782184594547/10002 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns + columns.types + escape.delim \ + TotalFiles: 1 + MultiFileSpray: false + Needs Tagging: false + Path -> Alias: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 [subq:b] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09 [subq:b] + Path -> Partition: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part + name srcbucket_mapjoin_part + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1268337453 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part + name srcbucket_mapjoin_part + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1268337453 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: srcbucket_mapjoin_part + name: srcbucket_mapjoin_part + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09 + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part + name srcbucket_mapjoin_part + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1268337453 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part + name srcbucket_mapjoin_part + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1268337453 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: srcbucket_mapjoin_part + name: srcbucket_mapjoin_part + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-38_982_821040782184594547/10002 + Select Operator + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Needs Tagging: false + Path -> Alias: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-38_982_821040782184594547/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-38_982_821040782184594547/10002] + Path -> Partition: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-38_982_821040782184594547/10002 + Partition + base file name: 10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns + columns.types + escape.delim \ + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns + columns.types + escape.delim \ + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-38_982_821040782184594547/10001 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + serialization.format 1 + TotalFiles: 1 + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select count(1) from ( +select /*+mapjoin(a, c)*/ a.key as key, a.value as v1, b.value as v2 +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key join srcbucket_mapjoin_part c on b.key = c.key ) subq +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-09 +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-39_775_7382526982231902090/10000 +POSTHOOK: query: select count(1) from ( +select /*+mapjoin(a, c)*/ a.key as key, a.value as v1, b.value as v2 +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key join srcbucket_mapjoin_part c on b.key = c.key ) subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-09 +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-39_775_7382526982231902090/10000 +4456 +PREHOOK: query: explain extended +select count(1) from ( +select /*+mapjoin(a, c)*/ a.key as key, a.value as v1, b.value as v2 +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key join srcbucket_mapjoin_part c on b.key = c.key where a.key<100 and b.key<100) subq +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select count(1) from ( +select /*+mapjoin(a, c)*/ a.key as key, a.value as v1, b.value as v2 +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key join srcbucket_mapjoin_part c on b.key = c.key where a.key<100 and b.key<100) subq +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF srcbucket_mapjoin a) (TOK_TABREF srcbucket_mapjoin_part b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF srcbucket_mapjoin_part c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a c))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) v1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) v2)) (TOK_WHERE (and (< (. (TOK_TABLE_OR_COL a) key) 100) (< (. (TOK_TABLE_OR_COL b) key) 100))))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + subq:b + TableScan + alias: b + Filter Operator + isSamplingPred: false + predicate: + expr: (key < 100) + type: boolean + Common Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {key} + 1 {key} + 2 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + 2 [Column[key]] + outputColumnNames: _col0, _col2 + Position of Big Table: 1 + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-53_555_7227321254338602823/10002 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col2 + columns.types int,int + escape.delim \ + TotalFiles: 1 + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + subq:a + Fetch Operator + limit: -1 + subq:c + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subq:a + TableScan + alias: a + Filter Operator + isSamplingPred: false + predicate: + expr: (key < 100) + type: boolean + Common Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {key} + 1 {key} + 2 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + 2 [Column[key]] + outputColumnNames: _col0, _col2 + Position of Big Table: 1 + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-53_555_7227321254338602823/10002 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col2 + columns.types int,int + escape.delim \ + TotalFiles: 1 + MultiFileSpray: false + subq:c + TableScan + alias: c + Common Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {key} + 1 {key} + 2 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + 2 [Column[key]] + outputColumnNames: _col0, _col2 + Position of Big Table: 1 + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-53_555_7227321254338602823/10002 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col2 + columns.types int,int + escape.delim \ + TotalFiles: 1 + MultiFileSpray: false + Needs Tagging: false + Path -> Alias: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 [subq:b] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09 [subq:b] + Path -> Partition: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part + name srcbucket_mapjoin_part + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1268337453 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part + name srcbucket_mapjoin_part + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1268337453 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: srcbucket_mapjoin_part + name: srcbucket_mapjoin_part + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09 + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part + name srcbucket_mapjoin_part + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1268337453 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part + name srcbucket_mapjoin_part + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1268337453 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: srcbucket_mapjoin_part + name: srcbucket_mapjoin_part + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-53_555_7227321254338602823/10002 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col2 + type: int + outputColumnNames: _col0, _col2 + Filter Operator + isSamplingPred: false + predicate: + expr: ((_col0 < 100) and (_col2 < 100)) + type: boolean + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Needs Tagging: false + Path -> Alias: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-53_555_7227321254338602823/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-53_555_7227321254338602823/10002] + Path -> Partition: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-53_555_7227321254338602823/10002 + Partition + base file name: 10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col2 + columns.types int,int + escape.delim \ + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col2 + columns.types int,int + escape.delim \ + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-53_555_7227321254338602823/10001 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + serialization.format 1 + TotalFiles: 1 + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select count(1) from ( +select /*+mapjoin(a, c)*/ a.key as key, a.value as v1, b.value as v2 +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key join srcbucket_mapjoin_part c on b.key = c.key where a.key<100 and b.key<100) subq +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-09 +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-54_235_3922852826658694835/10000 +POSTHOOK: query: select count(1) from ( +select /*+mapjoin(a, c)*/ a.key as key, a.value as v1, b.value as v2 +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key join srcbucket_mapjoin_part c on b.key = c.key where a.key<100 and b.key<100) subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-09 +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-11_11-58-54_235_3922852826658694835/10000 +656 +PREHOOK: query: drop table srcbucket_mapjoin +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table srcbucket_mapjoin +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: drop table srcbucket_mapjoin_part +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table srcbucket_mapjoin_part +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part +PREHOOK: query: drop table srcbucket_mapjoin_part_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table srcbucket_mapjoin_part_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 Index: ql/src/test/results/clientpositive/join28.q.out =================================================================== --- ql/src/test/results/clientpositive/join28.q.out (revision 919258) +++ ql/src/test/results/clientpositive/join28.q.out (working copy) @@ -28,11 +28,10 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 , consists of Stage-4, Stage-3 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3 + Stage-4 depends on stages: Stage-1 , consists of Stage-3, Stage-2 Stage-3 + Stage-0 depends on stages: Stage-3, Stage-2 + Stage-2 STAGE PLANS: Stage: Stage-1 @@ -53,18 +52,59 @@ 1 [Column[key]] outputColumnNames: _col0 Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + outputColumnNames: _col0, _col5 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col5 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest_j1 Local Work: Map Reduce Local Work Alias -> Map Local Tables: subq:x Fetch Operator limit: -1 + z + Fetch Operator + limit: -1 Alias -> Map Local Operator Tree: subq:x TableScan @@ -81,68 +121,50 @@ 1 [Column[key]] outputColumnNames: _col0 Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: - file:/data/users/heyongqiang/hive-trunk/.ptest_0/build/ql/tmp/296504271/10002 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[key]] - outputColumnNames: _col0, _col5 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col5 Select Operator expressions: expr: _col0 type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: dest_j1 - Local Work: - Map Reduce Local Work - Alias -> Map Local Tables: - z - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + outputColumnNames: _col0, _col5 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col5 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest_j1 z TableScan alias: z @@ -193,14 +215,14 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 - Stage: Stage-5 + Stage: Stage-4 Conditional Operator - Stage: Stage-4 + Stage: Stage-3 Move Operator files: hdfs directory: true - destination: file:/data/users/heyongqiang/hive-trunk/.ptest_0/build/ql/tmp/562095980/10000 + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_17-46-39_322_4155009261993353073/10000 Stage: Stage-0 Move Operator @@ -212,10 +234,10 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 - Stage: Stage-3 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/data/users/heyongqiang/hive-trunk/.ptest_0/build/ql/tmp/296504271/10003 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_17-46-39_322_4155009261993353073/10002 Reduce Output Operator sort order: Map-reduce partition columns: @@ -264,11 +286,11 @@ PREHOOK: query: select * from dest_j1 x order by x.key PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 -PREHOOK: Output: file:/data/users/heyongqiang/hive-trunk/.ptest_0/build/ql/tmp/1591031497/10000 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_17-46-46_085_5065097424609393219/10000 POSTHOOK: query: select * from dest_j1 x order by x.key POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_j1 -POSTHOOK: Output: file:/data/users/heyongqiang/hive-trunk/.ptest_0/build/ql/tmp/1591031497/10000 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_17-46-46_085_5065097424609393219/10000 128 val_128 128 val_128 128 val_128 Index: ql/src/test/results/clientpositive/join32.q.out =================================================================== --- ql/src/test/results/clientpositive/join32.q.out (revision 919258) +++ ql/src/test/results/clientpositive/join32.q.out (working copy) @@ -19,15 +19,16 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF src1 x) (TOK_TABREF src y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key))) (TOK_TABREF srcpart z) (and (and (= (. (TOK_TABLE_OR_COL x) value) (. (TOK_TABLE_OR_COL z) value)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB dest_j1)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x z))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value))))) STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-1 depends on stages: Stage-5 - Stage-4 depends on stages: Stage-1 , consists of Stage-3, Stage-2 + Stage-6 is a root stage + Stage-1 depends on stages: Stage-6 + Stage-2 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 , consists of Stage-4, Stage-3 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3 Stage-3 - Stage-0 depends on stages: Stage-3, Stage-2 - Stage-2 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Alias -> Map Operator Tree: y @@ -48,7 +49,7 @@ File Output Operator compressed: false GlobalTableId: 0 - directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_16-01-40_907_1655518346126745740/10003 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_17-39-31_582_9166944613001622799/10004 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -84,7 +85,7 @@ File Output Operator compressed: false GlobalTableId: 0 - directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_16-01-40_907_1655518346126745740/10003 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_17-39-31_582_9166944613001622799/10004 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -97,9 +98,9 @@ MultiFileSpray: false Needs Tagging: false Path -> Alias: - file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/src [y] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/src [y] Path -> Partition: - file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/src + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/src Partition base file name: src input format: org.apache.hadoop.mapred.TextInputFormat @@ -110,12 +111,12 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/src + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/src name src serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266451300 + transient_lastDdlTime 1268271570 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -126,12 +127,12 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/src + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/src name src serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266451300 + transient_lastDdlTime 1268271570 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: src name: src @@ -139,7 +140,7 @@ Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_16-01-40_907_1655518346126745740/10003 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_17-39-31_582_9166944613001622799/10004 Select Operator expressions: expr: _col0 @@ -161,48 +162,20 @@ 1 [Column[value]] outputColumnNames: _col1, _col2, _col5 Position of Big Table: 0 - Select Operator - expressions: - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col5 - type: string - outputColumnNames: _col1, _col2, _col5 - Select Operator - expressions: - expr: _col2 - type: string - expr: _col5 - type: string - expr: _col1 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_16-01-40_907_1655518346126745740/10002 - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string - file.inputformat org.apache.hadoop.mapred.TextInputFormat - file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/dest_j1 - name dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266451300 - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: dest_j1 - TotalFiles: 1 - MultiFileSpray: false + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_17-39-31_582_9166944613001622799/10002 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5 + columns.types string,string,string + escape.delim \ + TotalFiles: 1 + MultiFileSpray: false Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -240,55 +213,27 @@ 1 [Column[value]] outputColumnNames: _col1, _col2, _col5 Position of Big Table: 0 - Select Operator - expressions: - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col5 - type: string - outputColumnNames: _col1, _col2, _col5 - Select Operator - expressions: - expr: _col2 - type: string - expr: _col5 - type: string - expr: _col1 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_16-01-40_907_1655518346126745740/10002 - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string - file.inputformat org.apache.hadoop.mapred.TextInputFormat - file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/dest_j1 - name dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266451300 - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: dest_j1 - TotalFiles: 1 - MultiFileSpray: false + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_17-39-31_582_9166944613001622799/10002 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5 + columns.types string,string,string + escape.delim \ + TotalFiles: 1 + MultiFileSpray: false Needs Tagging: false Path -> Alias: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_16-01-40_907_1655518346126745740/10003 [file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_16-01-40_907_1655518346126745740/10003] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_17-39-31_582_9166944613001622799/10004 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_17-39-31_582_9166944613001622799/10004] Path -> Partition: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_16-01-40_907_1655518346126745740/10003 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_17-39-31_582_9166944613001622799/10004 Partition - base file name: 10003 + base file name: 10004 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: @@ -303,21 +248,88 @@ columns.types string,string,string escape.delim \ - Stage: Stage-4 + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_17-39-31_582_9166944613001622799/10002 + Select Operator + expressions: + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col5 + type: string + outputColumnNames: _col1, _col2, _col5 + Select Operator + expressions: + expr: _col2 + type: string + expr: _col5 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_17-39-31_582_9166944613001622799/10003 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/dest_j1 + name dest_j1 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1268271571 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest_j1 + TotalFiles: 1 + MultiFileSpray: false + Needs Tagging: false + Path -> Alias: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_17-39-31_582_9166944613001622799/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_17-39-31_582_9166944613001622799/10002] + Path -> Partition: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_17-39-31_582_9166944613001622799/10002 + Partition + base file name: 10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5 + columns.types string,string,string + escape.delim \ + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5 + columns.types string,string,string + escape.delim \ + + Stage: Stage-5 Conditional Operator - Stage: Stage-3 + Stage: Stage-4 Move Operator files: hdfs directory: true - source: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_16-01-40_907_1655518346126745740/10002 - destination: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_16-01-40_907_1655518346126745740/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_17-39-31_582_9166944613001622799/10003 + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_17-39-31_582_9166944613001622799/10000 Stage: Stage-0 Move Operator tables: replace: true - source: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_16-01-40_907_1655518346126745740/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_17-39-31_582_9166944613001622799/10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -327,20 +339,20 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/dest_j1 + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, string val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266451300 + transient_lastDdlTime 1268271571 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 - tmp directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_16-01-40_907_1655518346126745740/10001 + tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_17-39-31_582_9166944613001622799/10001 - Stage: Stage-2 + Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_16-01-40_907_1655518346126745740/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_17-39-31_582_9166944613001622799/10003 Reduce Output Operator sort order: Map-reduce partition columns: @@ -356,11 +368,11 @@ type: string Needs Tagging: false Path -> Alias: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_16-01-40_907_1655518346126745740/10002 [file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_16-01-40_907_1655518346126745740/10002] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_17-39-31_582_9166944613001622799/10003 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_17-39-31_582_9166944613001622799/10003] Path -> Partition: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_16-01-40_907_1655518346126745740/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_17-39-31_582_9166944613001622799/10003 Partition - base file name: 10002 + base file name: 10003 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -369,12 +381,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/dest_j1 + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, string val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266451300 + transient_lastDdlTime 1268271571 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -385,12 +397,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/dest_j1 + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, string val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266451300 + transient_lastDdlTime 1268271571 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 name: dest_j1 @@ -399,7 +411,7 @@ File Output Operator compressed: false GlobalTableId: 0 - directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_16-01-40_907_1655518346126745740/10000 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_17-39-31_582_9166944613001622799/10000 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -410,12 +422,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/dest_j1 + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, string val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266451300 + transient_lastDdlTime 1268271571 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 TotalFiles: 1 @@ -443,11 +455,11 @@ PREHOOK: query: select * from dest_j1 x order by x.key PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 -PREHOOK: Output: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_16-01-50_498_3146515945044022967/10000 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_17-39-43_999_3902245860591586653/10000 POSTHOOK: query: select * from dest_j1 x order by x.key POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_j1 -POSTHOOK: Output: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_16-01-50_498_3146515945044022967/10000 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_17-39-43_999_3902245860591586653/10000 146 val_146 val_146 146 val_146 val_146 146 val_146 val_146 Index: ql/src/test/results/clientpositive/smb_subquery.q.out =================================================================== --- ql/src/test/results/clientpositive/smb_subquery.q.out (revision 0) +++ ql/src/test/results/clientpositive/smb_subquery.q.out (revision 0) @@ -0,0 +1,1241 @@ +PREHOOK: query: drop table bucketmapjoin6_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucketmapjoin6_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE bucketmapjoin6_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE bucketmapjoin6_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@bucketmapjoin6_1 +PREHOOK: query: drop table bucketmapjoin6_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucketmapjoin6_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE bucketmapjoin6_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE bucketmapjoin6_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@bucketmapjoin6_2 +PREHOOK: query: drop table bucketmapjoin6_3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucketmapjoin6_3 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE bucketmapjoin6_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE bucketmapjoin6_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@bucketmapjoin6_3 +PREHOOK: query: insert overwrite table bucketmapjoin6_1 +select * from src where key < 100 and key != 44 and key != 28 and key != 15 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@bucketmapjoin6_1 +POSTHOOK: query: insert overwrite table bucketmapjoin6_1 +select * from src where key < 100 and key != 44 and key != 28 and key != 15 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@bucketmapjoin6_1 +PREHOOK: query: select * from bucketmapjoin6_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin6_1 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-09_09-56-46_852_6740815296710129403/10000 +POSTHOOK: query: select * from bucketmapjoin6_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin6_1 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-09_09-56-46_852_6740815296710129403/10000 +0 val_0 +0 val_0 +0 val_0 +2 val_2 +4 val_4 +5 val_5 +5 val_5 +5 val_5 +8 val_8 +9 val_9 +10 val_10 +11 val_11 +12 val_12 +12 val_12 +17 val_17 +18 val_18 +18 val_18 +19 val_19 +20 val_20 +24 val_24 +24 val_24 +26 val_26 +26 val_26 +27 val_27 +30 val_30 +33 val_33 +34 val_34 +35 val_35 +35 val_35 +35 val_35 +37 val_37 +37 val_37 +41 val_41 +42 val_42 +42 val_42 +43 val_43 +47 val_47 +51 val_51 +51 val_51 +53 val_53 +54 val_54 +57 val_57 +58 val_58 +58 val_58 +64 val_64 +65 val_65 +66 val_66 +67 val_67 +67 val_67 +69 val_69 +70 val_70 +70 val_70 +70 val_70 +72 val_72 +72 val_72 +74 val_74 +76 val_76 +76 val_76 +77 val_77 +78 val_78 +80 val_80 +82 val_82 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +85 val_85 +86 val_86 +87 val_87 +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: insert overwrite table bucketmapjoin6_2 +select * from src where key > 20 and key < 50 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@bucketmapjoin6_2 +POSTHOOK: query: insert overwrite table bucketmapjoin6_2 +select * from src where key > 20 and key < 50 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@bucketmapjoin6_2 +PREHOOK: query: select * from bucketmapjoin6_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin6_2 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-09_09-56-51_349_8523171024526064482/10000 +POSTHOOK: query: select * from bucketmapjoin6_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin6_2 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-09_09-56-51_349_8523171024526064482/10000 +24 val_24 +24 val_24 +26 val_26 +26 val_26 +27 val_27 +28 val_28 +30 val_30 +33 val_33 +34 val_34 +35 val_35 +35 val_35 +35 val_35 +37 val_37 +37 val_37 +41 val_41 +42 val_42 +42 val_42 +43 val_43 +44 val_44 +47 val_47 +PREHOOK: query: insert overwrite table bucketmapjoin6_3 +select * from src where key > 10 and key < 30 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@bucketmapjoin6_3 +POSTHOOK: query: insert overwrite table bucketmapjoin6_3 +select * from src where key > 10 and key < 30 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@bucketmapjoin6_3 +PREHOOK: query: select * from bucketmapjoin6_3 +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin6_3 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-09_09-56-55_738_689896018048558029/10000 +POSTHOOK: query: select * from bucketmapjoin6_3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin6_3 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-09_09-56-55_738_689896018048558029/10000 +11 val_11 +12 val_12 +12 val_12 +15 val_15 +15 val_15 +17 val_17 +18 val_18 +18 val_18 +19 val_19 +20 val_20 +24 val_24 +24 val_24 +26 val_26 +26 val_26 +27 val_27 +28 val_28 +PREHOOK: query: explain +select * from +( +select +CASE +WHEN M.key is not null THEN M.key +ELSE S.key +END as key, +M.value as Mvalue, S.value as Svalue from +(select * from bucketmapjoin6_1 where key <= 20 or key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +full outer join +bucketmapjoin6_3 L +on subq2.key = L.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from +( +select +CASE +WHEN M.key is not null THEN M.key +ELSE S.key +END as key, +M.value as Mvalue, S.value as Svalue from +(select * from bucketmapjoin6_1 where key <= 20 or key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +full outer join +bucketmapjoin6_3 L +on subq2.key = L.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF bucketmapjoin6_1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (or (<= (TOK_TABLE_OR_COL key) 20) (>= (TOK_TABLE_OR_COL key) 50))))) M) (TOK_TABREF bucketmapjoin6_2 S) (= (. (TOK_TABLE_OR_COL M) key) (. (TOK_TABLE_OR_COL S) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION WHEN (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL M) key)) (. (TOK_TABLE_OR_COL M) key) (. (TOK_TABLE_OR_COL S) key)) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL M) value) Mvalue) (TOK_SELEXPR (. (TOK_TABLE_OR_COL S) value) Svalue)))) subq2) (TOK_TABREF bucketmapjoin6_3 L) (= (. (TOK_TABLE_OR_COL subq2) key) (. (TOK_TABLE_OR_COL L) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + subq2:m:bucketmapjoin6_1 + TableScan + alias: bucketmapjoin6_1 + Filter Operator + predicate: + expr: ((key <= 20) or (key >= 50)) + type: boolean + Filter Operator + predicate: + expr: ((key <= 20) or (key >= 50)) + type: boolean + Select Operator + expressions: + expr: key + type: int + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + tag: 0 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: string + subq2:s + TableScan + alias: s + Reduce Output Operator + key expressions: + expr: key + type: int + sort order: + + Map-reduce partition columns: + expr: key + type: int + tag: 1 + value expressions: + expr: key + type: int + expr: value + type: string + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: CASE WHEN (_col0 is not null) THEN (_col0) ELSE (_col2) END + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + tag: 0 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: string + l + TableScan + alias: l + Reduce Output Operator + key expressions: + expr: key + type: int + sort order: + + Map-reduce partition columns: + expr: key + type: int + tag: 1 + value expressions: + expr: key + type: int + expr: value + type: string + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} + 1 {VALUE._col0} {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: int + expr: _col4 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select * from +( +select +CASE +WHEN M.key is not null THEN M.key +ELSE S.key +END as key, +M.value as Mvalue, S.value as Svalue from +(select * from bucketmapjoin6_1 where key <= 20 or key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +full outer join +bucketmapjoin6_3 L +on subq2.key = L.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin6_1 +PREHOOK: Input: default@bucketmapjoin6_3 +PREHOOK: Input: default@bucketmapjoin6_2 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-09_09-56-56_551_293111223676248499/10000 +POSTHOOK: query: select * from +( +select +CASE +WHEN M.key is not null THEN M.key +ELSE S.key +END as key, +M.value as Mvalue, S.value as Svalue from +(select * from bucketmapjoin6_1 where key <= 20 or key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +full outer join +bucketmapjoin6_3 L +on subq2.key = L.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin6_1 +POSTHOOK: Input: default@bucketmapjoin6_3 +POSTHOOK: Input: default@bucketmapjoin6_2 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-09_09-56-56_551_293111223676248499/10000 +0 val_0 NULL NULL NULL +0 val_0 NULL NULL NULL +0 val_0 NULL NULL NULL +2 val_2 NULL NULL NULL +4 val_4 NULL NULL NULL +5 val_5 NULL NULL NULL +5 val_5 NULL NULL NULL +5 val_5 NULL NULL NULL +8 val_8 NULL NULL NULL +9 val_9 NULL NULL NULL +10 val_10 NULL NULL NULL +11 val_11 NULL 11 val_11 +12 val_12 NULL 12 val_12 +12 val_12 NULL 12 val_12 +12 val_12 NULL 12 val_12 +12 val_12 NULL 12 val_12 +NULL NULL NULL 15 val_15 +NULL NULL NULL 15 val_15 +17 val_17 NULL 17 val_17 +18 val_18 NULL 18 val_18 +18 val_18 NULL 18 val_18 +18 val_18 NULL 18 val_18 +18 val_18 NULL 18 val_18 +19 val_19 NULL 19 val_19 +20 val_20 NULL 20 val_20 +24 NULL val_24 24 val_24 +24 NULL val_24 24 val_24 +24 NULL val_24 24 val_24 +24 NULL val_24 24 val_24 +26 NULL val_26 26 val_26 +26 NULL val_26 26 val_26 +26 NULL val_26 26 val_26 +26 NULL val_26 26 val_26 +27 NULL val_27 27 val_27 +28 NULL val_28 28 val_28 +30 NULL val_30 NULL NULL +33 NULL val_33 NULL NULL +34 NULL val_34 NULL NULL +35 NULL val_35 NULL NULL +35 NULL val_35 NULL NULL +35 NULL val_35 NULL NULL +37 NULL val_37 NULL NULL +37 NULL val_37 NULL NULL +41 NULL val_41 NULL NULL +42 NULL val_42 NULL NULL +42 NULL val_42 NULL NULL +43 NULL val_43 NULL NULL +44 NULL val_44 NULL NULL +47 NULL val_47 NULL NULL +51 val_51 NULL NULL NULL +51 val_51 NULL NULL NULL +53 val_53 NULL NULL NULL +54 val_54 NULL NULL NULL +57 val_57 NULL NULL NULL +58 val_58 NULL NULL NULL +58 val_58 NULL NULL NULL +64 val_64 NULL NULL NULL +65 val_65 NULL NULL NULL +66 val_66 NULL NULL NULL +67 val_67 NULL NULL NULL +67 val_67 NULL NULL NULL +69 val_69 NULL NULL NULL +70 val_70 NULL NULL NULL +70 val_70 NULL NULL NULL +70 val_70 NULL NULL NULL +72 val_72 NULL NULL NULL +72 val_72 NULL NULL NULL +74 val_74 NULL NULL NULL +76 val_76 NULL NULL NULL +76 val_76 NULL NULL NULL +77 val_77 NULL NULL NULL +78 val_78 NULL NULL NULL +80 val_80 NULL NULL NULL +82 val_82 NULL NULL NULL +83 val_83 NULL NULL NULL +83 val_83 NULL NULL NULL +84 val_84 NULL NULL NULL +84 val_84 NULL NULL NULL +85 val_85 NULL NULL NULL +86 val_86 NULL NULL NULL +87 val_87 NULL NULL NULL +90 val_90 NULL NULL NULL +90 val_90 NULL NULL NULL +90 val_90 NULL NULL NULL +92 val_92 NULL NULL NULL +95 val_95 NULL NULL NULL +95 val_95 NULL NULL NULL +96 val_96 NULL NULL NULL +97 val_97 NULL NULL NULL +97 val_97 NULL NULL NULL +98 val_98 NULL NULL NULL +98 val_98 NULL NULL NULL +PREHOOK: query: explain +select /*+ MAPJOIN(L) */ * from +( +select /*+ MAPJOIN(S) */ +CASE +WHEN M.key is not null THEN M.key +ELSE S.key +END as key, +M.value as Mvalue, S.value as Svalue from +(select * from bucketmapjoin6_1 where key <= 20 and key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +full outer join +bucketmapjoin6_3 L +on subq2.key = L.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+ MAPJOIN(L) */ * from +( +select /*+ MAPJOIN(S) */ +CASE +WHEN M.key is not null THEN M.key +ELSE S.key +END as key, +M.value as Mvalue, S.value as Svalue from +(select * from bucketmapjoin6_1 where key <= 20 and key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +full outer join +bucketmapjoin6_3 L +on subq2.key = L.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF bucketmapjoin6_1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (<= (TOK_TABLE_OR_COL key) 20) (>= (TOK_TABLE_OR_COL key) 50))))) M) (TOK_TABREF bucketmapjoin6_2 S) (= (. (TOK_TABLE_OR_COL M) key) (. (TOK_TABLE_OR_COL S) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST S))) (TOK_SELEXPR (TOK_FUNCTION WHEN (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL M) key)) (. (TOK_TABLE_OR_COL M) key) (. (TOK_TABLE_OR_COL S) key)) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL M) value) Mvalue) (TOK_SELEXPR (. (TOK_TABLE_OR_COL S) value) Svalue)))) subq2) (TOK_TABREF bucketmapjoin6_3 L) (= (. (TOK_TABLE_OR_COL subq2) key) (. (TOK_TABLE_OR_COL L) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST L))) (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + subq2:m:bucketmapjoin6_1 + TableScan + alias: bucketmapjoin6_1 + Filter Operator + predicate: + expr: ((key <= 20) and (key >= 50)) + type: boolean + Filter Operator + predicate: + expr: ((key <= 20) and (key >= 50)) + type: boolean + Select Operator + expressions: + expr: key + type: int + expr: value + type: string + outputColumnNames: _col0, _col1 + Sorted Merge Bucket Map Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {key} {value} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col2, _col3 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: int + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: CASE WHEN (_col0 is not null) THEN (_col0) ELSE (_col2) END + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + Sorted Merge Bucket Map Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col2} + 1 {key} {value} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: int + expr: _col4 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: int + expr: _col4 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select /*+ MAPJOIN(L) */ * from +( +select /*+ MAPJOIN(S) */ +CASE +WHEN M.key is not null THEN M.key +ELSE S.key +END as key, +M.value as Mvalue, S.value as Svalue from +(select * from bucketmapjoin6_1 where key <= 20 or key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +full outer join +bucketmapjoin6_3 L +on subq2.key = L.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin6_1 +PREHOOK: Input: default@bucketmapjoin6_3 +PREHOOK: Input: default@bucketmapjoin6_2 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-09_09-57-10_108_3324424854665670423/10000 +POSTHOOK: query: select /*+ MAPJOIN(L) */ * from +( +select /*+ MAPJOIN(S) */ +CASE +WHEN M.key is not null THEN M.key +ELSE S.key +END as key, +M.value as Mvalue, S.value as Svalue from +(select * from bucketmapjoin6_1 where key <= 20 or key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +full outer join +bucketmapjoin6_3 L +on subq2.key = L.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin6_1 +POSTHOOK: Input: default@bucketmapjoin6_3 +POSTHOOK: Input: default@bucketmapjoin6_2 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-09_09-57-10_108_3324424854665670423/10000 +0 val_0 NULL NULL NULL +0 val_0 NULL NULL NULL +0 val_0 NULL NULL NULL +2 val_2 NULL NULL NULL +4 val_4 NULL NULL NULL +5 val_5 NULL NULL NULL +5 val_5 NULL NULL NULL +5 val_5 NULL NULL NULL +8 val_8 NULL NULL NULL +9 val_9 NULL NULL NULL +10 val_10 NULL NULL NULL +11 val_11 NULL 11 val_11 +12 val_12 NULL 12 val_12 +12 val_12 NULL 12 val_12 +12 val_12 NULL 12 val_12 +12 val_12 NULL 12 val_12 +NULL NULL NULL 15 val_15 +NULL NULL NULL 15 val_15 +17 val_17 NULL 17 val_17 +18 val_18 NULL 18 val_18 +18 val_18 NULL 18 val_18 +18 val_18 NULL 18 val_18 +18 val_18 NULL 18 val_18 +19 val_19 NULL 19 val_19 +20 val_20 NULL 20 val_20 +24 NULL val_24 24 val_24 +24 NULL val_24 24 val_24 +24 NULL val_24 24 val_24 +24 NULL val_24 24 val_24 +26 NULL val_26 26 val_26 +26 NULL val_26 26 val_26 +26 NULL val_26 26 val_26 +26 NULL val_26 26 val_26 +27 NULL val_27 27 val_27 +28 NULL val_28 28 val_28 +30 NULL val_30 NULL NULL +33 NULL val_33 NULL NULL +34 NULL val_34 NULL NULL +35 NULL val_35 NULL NULL +35 NULL val_35 NULL NULL +35 NULL val_35 NULL NULL +37 NULL val_37 NULL NULL +37 NULL val_37 NULL NULL +41 NULL val_41 NULL NULL +42 NULL val_42 NULL NULL +42 NULL val_42 NULL NULL +43 NULL val_43 NULL NULL +44 NULL val_44 NULL NULL +47 NULL val_47 NULL NULL +51 val_51 NULL NULL NULL +51 val_51 NULL NULL NULL +53 val_53 NULL NULL NULL +54 val_54 NULL NULL NULL +57 val_57 NULL NULL NULL +58 val_58 NULL NULL NULL +58 val_58 NULL NULL NULL +64 val_64 NULL NULL NULL +65 val_65 NULL NULL NULL +66 val_66 NULL NULL NULL +67 val_67 NULL NULL NULL +67 val_67 NULL NULL NULL +69 val_69 NULL NULL NULL +70 val_70 NULL NULL NULL +70 val_70 NULL NULL NULL +70 val_70 NULL NULL NULL +72 val_72 NULL NULL NULL +72 val_72 NULL NULL NULL +74 val_74 NULL NULL NULL +76 val_76 NULL NULL NULL +76 val_76 NULL NULL NULL +77 val_77 NULL NULL NULL +78 val_78 NULL NULL NULL +80 val_80 NULL NULL NULL +82 val_82 NULL NULL NULL +83 val_83 NULL NULL NULL +83 val_83 NULL NULL NULL +84 val_84 NULL NULL NULL +84 val_84 NULL NULL NULL +85 val_85 NULL NULL NULL +86 val_86 NULL NULL NULL +87 val_87 NULL NULL NULL +90 val_90 NULL NULL NULL +90 val_90 NULL NULL NULL +90 val_90 NULL NULL NULL +92 val_92 NULL NULL NULL +95 val_95 NULL NULL NULL +95 val_95 NULL NULL NULL +96 val_96 NULL NULL NULL +97 val_97 NULL NULL NULL +97 val_97 NULL NULL NULL +98 val_98 NULL NULL NULL +98 val_98 NULL NULL NULL +PREHOOK: query: explain +select /*+ MAPJOIN(L) */ * from bucketmapjoin6_3 L +full outer join +( +select /*+ MAPJOIN(S) */ +CASE +WHEN M.key is not null THEN M.key +ELSE S.key +END as key, +M.value as Mvalue, S.value as Svalue from +(select * from bucketmapjoin6_1 where key <= 20 and key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +on subq2.key = L.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+ MAPJOIN(L) */ * from bucketmapjoin6_3 L +full outer join +( +select /*+ MAPJOIN(S) */ +CASE +WHEN M.key is not null THEN M.key +ELSE S.key +END as key, +M.value as Mvalue, S.value as Svalue from +(select * from bucketmapjoin6_1 where key <= 20 and key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +on subq2.key = L.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_TABREF bucketmapjoin6_3 L) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF bucketmapjoin6_1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (<= (TOK_TABLE_OR_COL key) 20) (>= (TOK_TABLE_OR_COL key) 50))))) M) (TOK_TABREF bucketmapjoin6_2 S) (= (. (TOK_TABLE_OR_COL M) key) (. (TOK_TABLE_OR_COL S) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST S))) (TOK_SELEXPR (TOK_FUNCTION WHEN (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL M) key)) (. (TOK_TABLE_OR_COL M) key) (. (TOK_TABLE_OR_COL S) key)) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL M) value) Mvalue) (TOK_SELEXPR (. (TOK_TABLE_OR_COL S) value) Svalue)))) subq2) (= (. (TOK_TABLE_OR_COL subq2) key) (. (TOK_TABLE_OR_COL L) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST L))) (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + subq2:m:bucketmapjoin6_1 + TableScan + alias: bucketmapjoin6_1 + Filter Operator + predicate: + expr: ((key <= 20) and (key >= 50)) + type: boolean + Filter Operator + predicate: + expr: ((key <= 20) and (key >= 50)) + type: boolean + Select Operator + expressions: + expr: key + type: int + expr: value + type: string + outputColumnNames: _col0, _col1 + Sorted Merge Bucket Map Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {key} {value} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col2, _col3 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: int + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: CASE WHEN (_col0 is not null) THEN (_col0) ELSE (_col2) END + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + Sorted Merge Bucket Map Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {_col0} {_col1} {_col2} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[_col0]] + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: int + expr: _col3 + type: string + expr: _col4 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: int + expr: _col3 + type: string + expr: _col4 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select /*+ MAPJOIN(L) */ * from bucketmapjoin6_3 L +full outer join +( +select /*+ MAPJOIN(S) */ +CASE +WHEN M.key is not null THEN M.key +ELSE S.key +END as key, +M.value as Mvalue, S.value as Svalue from +(select * from bucketmapjoin6_1 where key <= 20 and key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +on subq2.key = L.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin6_1 +PREHOOK: Input: default@bucketmapjoin6_3 +PREHOOK: Input: default@bucketmapjoin6_2 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-09_09-57-16_105_2896988082407948631/10000 +POSTHOOK: query: select /*+ MAPJOIN(L) */ * from bucketmapjoin6_3 L +full outer join +( +select /*+ MAPJOIN(S) */ +CASE +WHEN M.key is not null THEN M.key +ELSE S.key +END as key, +M.value as Mvalue, S.value as Svalue from +(select * from bucketmapjoin6_1 where key <= 20 and key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +on subq2.key = L.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin6_1 +POSTHOOK: Input: default@bucketmapjoin6_3 +POSTHOOK: Input: default@bucketmapjoin6_2 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-09_09-57-16_105_2896988082407948631/10000 +PREHOOK: query: select * from +( +select +CASE +WHEN M.key is not null THEN M.key +ELSE S.key +END as key, +M.value as Mvalue, S.value as Svalue from +(select * from bucketmapjoin6_1 where key <= 20 or key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +full outer join +bucketmapjoin6_3 L +on subq2.key = L.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin6_1 +PREHOOK: Input: default@bucketmapjoin6_3 +PREHOOK: Input: default@bucketmapjoin6_2 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-09_09-57-21_318_9187802803690490140/10000 +POSTHOOK: query: select * from +( +select +CASE +WHEN M.key is not null THEN M.key +ELSE S.key +END as key, +M.value as Mvalue, S.value as Svalue from +(select * from bucketmapjoin6_1 where key <= 20 or key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +full outer join +bucketmapjoin6_3 L +on subq2.key = L.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin6_1 +POSTHOOK: Input: default@bucketmapjoin6_3 +POSTHOOK: Input: default@bucketmapjoin6_2 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-09_09-57-21_318_9187802803690490140/10000 +0 val_0 NULL NULL NULL +0 val_0 NULL NULL NULL +0 val_0 NULL NULL NULL +2 val_2 NULL NULL NULL +4 val_4 NULL NULL NULL +5 val_5 NULL NULL NULL +5 val_5 NULL NULL NULL +5 val_5 NULL NULL NULL +8 val_8 NULL NULL NULL +9 val_9 NULL NULL NULL +10 val_10 NULL NULL NULL +11 val_11 NULL 11 val_11 +12 val_12 NULL 12 val_12 +12 val_12 NULL 12 val_12 +12 val_12 NULL 12 val_12 +12 val_12 NULL 12 val_12 +NULL NULL NULL 15 val_15 +NULL NULL NULL 15 val_15 +17 val_17 NULL 17 val_17 +18 val_18 NULL 18 val_18 +18 val_18 NULL 18 val_18 +18 val_18 NULL 18 val_18 +18 val_18 NULL 18 val_18 +19 val_19 NULL 19 val_19 +20 val_20 NULL 20 val_20 +24 NULL val_24 24 val_24 +24 NULL val_24 24 val_24 +24 NULL val_24 24 val_24 +24 NULL val_24 24 val_24 +26 NULL val_26 26 val_26 +26 NULL val_26 26 val_26 +26 NULL val_26 26 val_26 +26 NULL val_26 26 val_26 +27 NULL val_27 27 val_27 +28 NULL val_28 28 val_28 +30 NULL val_30 NULL NULL +33 NULL val_33 NULL NULL +34 NULL val_34 NULL NULL +35 NULL val_35 NULL NULL +35 NULL val_35 NULL NULL +35 NULL val_35 NULL NULL +37 NULL val_37 NULL NULL +37 NULL val_37 NULL NULL +41 NULL val_41 NULL NULL +42 NULL val_42 NULL NULL +42 NULL val_42 NULL NULL +43 NULL val_43 NULL NULL +44 NULL val_44 NULL NULL +47 NULL val_47 NULL NULL +51 val_51 NULL NULL NULL +51 val_51 NULL NULL NULL +53 val_53 NULL NULL NULL +54 val_54 NULL NULL NULL +57 val_57 NULL NULL NULL +58 val_58 NULL NULL NULL +58 val_58 NULL NULL NULL +64 val_64 NULL NULL NULL +65 val_65 NULL NULL NULL +66 val_66 NULL NULL NULL +67 val_67 NULL NULL NULL +67 val_67 NULL NULL NULL +69 val_69 NULL NULL NULL +70 val_70 NULL NULL NULL +70 val_70 NULL NULL NULL +70 val_70 NULL NULL NULL +72 val_72 NULL NULL NULL +72 val_72 NULL NULL NULL +74 val_74 NULL NULL NULL +76 val_76 NULL NULL NULL +76 val_76 NULL NULL NULL +77 val_77 NULL NULL NULL +78 val_78 NULL NULL NULL +80 val_80 NULL NULL NULL +82 val_82 NULL NULL NULL +83 val_83 NULL NULL NULL +83 val_83 NULL NULL NULL +84 val_84 NULL NULL NULL +84 val_84 NULL NULL NULL +85 val_85 NULL NULL NULL +86 val_86 NULL NULL NULL +87 val_87 NULL NULL NULL +90 val_90 NULL NULL NULL +90 val_90 NULL NULL NULL +90 val_90 NULL NULL NULL +92 val_92 NULL NULL NULL +95 val_95 NULL NULL NULL +95 val_95 NULL NULL NULL +96 val_96 NULL NULL NULL +97 val_97 NULL NULL NULL +97 val_97 NULL NULL NULL +98 val_98 NULL NULL NULL +98 val_98 NULL NULL NULL +PREHOOK: query: select * from +( +select +CASE +WHEN M.key is not null THEN M.key +ELSE S.key +END as key, +M.value as Mvalue, S.value as Svalue from +(select * from bucketmapjoin6_1 where key <= 20 or key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +full outer join +bucketmapjoin6_3 L +on subq2.key = L.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin6_1 +PREHOOK: Input: default@bucketmapjoin6_3 +PREHOOK: Input: default@bucketmapjoin6_2 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-09_09-57-31_957_71567799795893651/10000 +POSTHOOK: query: select * from +( +select +CASE +WHEN M.key is not null THEN M.key +ELSE S.key +END as key, +M.value as Mvalue, S.value as Svalue from +(select * from bucketmapjoin6_1 where key <= 20 or key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +full outer join +bucketmapjoin6_3 L +on subq2.key = L.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin6_1 +POSTHOOK: Input: default@bucketmapjoin6_3 +POSTHOOK: Input: default@bucketmapjoin6_2 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-09_09-57-31_957_71567799795893651/10000 +0 val_0 NULL NULL NULL +0 val_0 NULL NULL NULL +0 val_0 NULL NULL NULL +2 val_2 NULL NULL NULL +4 val_4 NULL NULL NULL +5 val_5 NULL NULL NULL +5 val_5 NULL NULL NULL +5 val_5 NULL NULL NULL +8 val_8 NULL NULL NULL +9 val_9 NULL NULL NULL +10 val_10 NULL NULL NULL +11 val_11 NULL 11 val_11 +12 val_12 NULL 12 val_12 +12 val_12 NULL 12 val_12 +12 val_12 NULL 12 val_12 +12 val_12 NULL 12 val_12 +NULL NULL NULL 15 val_15 +NULL NULL NULL 15 val_15 +17 val_17 NULL 17 val_17 +18 val_18 NULL 18 val_18 +18 val_18 NULL 18 val_18 +18 val_18 NULL 18 val_18 +18 val_18 NULL 18 val_18 +19 val_19 NULL 19 val_19 +20 val_20 NULL 20 val_20 +24 NULL val_24 24 val_24 +24 NULL val_24 24 val_24 +24 NULL val_24 24 val_24 +24 NULL val_24 24 val_24 +26 NULL val_26 26 val_26 +26 NULL val_26 26 val_26 +26 NULL val_26 26 val_26 +26 NULL val_26 26 val_26 +27 NULL val_27 27 val_27 +28 NULL val_28 28 val_28 +30 NULL val_30 NULL NULL +33 NULL val_33 NULL NULL +34 NULL val_34 NULL NULL +35 NULL val_35 NULL NULL +35 NULL val_35 NULL NULL +35 NULL val_35 NULL NULL +37 NULL val_37 NULL NULL +37 NULL val_37 NULL NULL +41 NULL val_41 NULL NULL +42 NULL val_42 NULL NULL +42 NULL val_42 NULL NULL +43 NULL val_43 NULL NULL +44 NULL val_44 NULL NULL +47 NULL val_47 NULL NULL +51 val_51 NULL NULL NULL +51 val_51 NULL NULL NULL +53 val_53 NULL NULL NULL +54 val_54 NULL NULL NULL +57 val_57 NULL NULL NULL +58 val_58 NULL NULL NULL +58 val_58 NULL NULL NULL +64 val_64 NULL NULL NULL +65 val_65 NULL NULL NULL +66 val_66 NULL NULL NULL +67 val_67 NULL NULL NULL +67 val_67 NULL NULL NULL +69 val_69 NULL NULL NULL +70 val_70 NULL NULL NULL +70 val_70 NULL NULL NULL +70 val_70 NULL NULL NULL +72 val_72 NULL NULL NULL +72 val_72 NULL NULL NULL +74 val_74 NULL NULL NULL +76 val_76 NULL NULL NULL +76 val_76 NULL NULL NULL +77 val_77 NULL NULL NULL +78 val_78 NULL NULL NULL +80 val_80 NULL NULL NULL +82 val_82 NULL NULL NULL +83 val_83 NULL NULL NULL +83 val_83 NULL NULL NULL +84 val_84 NULL NULL NULL +84 val_84 NULL NULL NULL +85 val_85 NULL NULL NULL +86 val_86 NULL NULL NULL +87 val_87 NULL NULL NULL +90 val_90 NULL NULL NULL +90 val_90 NULL NULL NULL +90 val_90 NULL NULL NULL +92 val_92 NULL NULL NULL +95 val_95 NULL NULL NULL +95 val_95 NULL NULL NULL +96 val_96 NULL NULL NULL +97 val_97 NULL NULL NULL +97 val_97 NULL NULL NULL +98 val_98 NULL NULL NULL +98 val_98 NULL NULL NULL +PREHOOK: query: drop table bucketmapjoin6_3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucketmapjoin6_3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@bucketmapjoin6_3 +PREHOOK: query: drop table bucketmapjoin6_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucketmapjoin6_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@bucketmapjoin6_2 +PREHOOK: query: drop table bucketmapjoin6_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucketmapjoin6_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@bucketmapjoin6_1 Index: ql/src/test/results/clientpositive/smb_transform.q.out =================================================================== --- ql/src/test/results/clientpositive/smb_transform.q.out (revision 0) +++ ql/src/test/results/clientpositive/smb_transform.q.out (revision 0) @@ -0,0 +1,510 @@ +PREHOOK: query: drop table bucketmapjoin6_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucketmapjoin6_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE bucketmapjoin6_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE bucketmapjoin6_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@bucketmapjoin6_1 +PREHOOK: query: drop table bucketmapjoin6_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucketmapjoin6_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE bucketmapjoin6_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE bucketmapjoin6_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@bucketmapjoin6_2 +PREHOOK: query: drop table bucketmapjoin6_3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucketmapjoin6_3 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE bucketmapjoin6_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE bucketmapjoin6_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@bucketmapjoin6_3 +PREHOOK: query: insert overwrite table bucketmapjoin6_1 +select * from src where key < 100 and key != 44 and key != 28 and key != 15 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@bucketmapjoin6_1 +POSTHOOK: query: insert overwrite table bucketmapjoin6_1 +select * from src where key < 100 and key != 44 and key != 28 and key != 15 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@bucketmapjoin6_1 +PREHOOK: query: insert overwrite table bucketmapjoin6_2 +select * from src where key > 20 and key < 50 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@bucketmapjoin6_2 +POSTHOOK: query: insert overwrite table bucketmapjoin6_2 +select * from src where key > 20 and key < 50 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@bucketmapjoin6_2 +PREHOOK: query: insert overwrite table bucketmapjoin6_3 +select * from src where key > 10 and key < 30 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@bucketmapjoin6_3 +POSTHOOK: query: insert overwrite table bucketmapjoin6_3 +select * from src where key > 10 and key < 30 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@bucketmapjoin6_3 +PREHOOK: query: explain +select /*+ MAPJOIN(L) */ * from +( +select /*+ MAPJOIN(S) */ +S.key as key, +M.value as Mvalue, S.value as Svalue from +(select TRANSFORM(*) USING 'cat' from bucketmapjoin6_1 where key <= 20 and key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +full outer join +bucketmapjoin6_3 L +on subq2.key = L.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+ MAPJOIN(L) */ * from +( +select /*+ MAPJOIN(S) */ +S.key as key, +M.value as Mvalue, S.value as Svalue from +(select TRANSFORM(*) USING 'cat' from bucketmapjoin6_1 where key <= 20 and key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +full outer join +bucketmapjoin6_3 L +on subq2.key = L.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF bucketmapjoin6_1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST TOK_ALLCOLREF) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER))) (TOK_WHERE (and (<= (TOK_TABLE_OR_COL key) 20) (>= (TOK_TABLE_OR_COL key) 50))))) M) (TOK_TABREF bucketmapjoin6_2 S) (= (. (TOK_TABLE_OR_COL M) key) (. (TOK_TABLE_OR_COL S) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST S))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL S) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL M) value) Mvalue) (TOK_SELEXPR (. (TOK_TABLE_OR_COL S) value) Svalue)))) subq2) (TOK_TABREF bucketmapjoin6_3 L) (= (. (TOK_TABLE_OR_COL subq2) key) (. (TOK_TABLE_OR_COL L) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST L))) (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + subq2:m:bucketmapjoin6_1 + TableScan + alias: bucketmapjoin6_1 + Filter Operator + predicate: + expr: ((key <= 20) and (key >= 50)) + type: boolean + Filter Operator + predicate: + expr: ((key <= 20) and (key >= 50)) + type: boolean + Select Operator + expressions: + expr: key + type: int + expr: value + type: string + outputColumnNames: _col0, _col1 + Transform Operator + command: cat + output info: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Sorted Merge Bucket Map Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {_col1} + 1 {key} {value} + handleSkewJoin: false + keys: + 0 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[_col0]()] + 1 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[key]()] + outputColumnNames: _col1, _col2, _col3 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col1 + type: string + expr: _col2 + type: int + expr: _col3 + type: string + outputColumnNames: _col1, _col2, _col3 + Select Operator + expressions: + expr: _col2 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + Sorted Merge Bucket Map Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col2} + 1 {key} {value} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: int + expr: _col4 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: int + expr: _col4 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select /*+ MAPJOIN(L) */ * from +( +select /*+ MAPJOIN(S) */ +S.key as key, +M.value as Mvalue, S.value as Svalue from +(select TRANSFORM(*) USING 'cat' from bucketmapjoin6_1 where key <= 20 and key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +full outer join +bucketmapjoin6_3 L +on subq2.key = L.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin6_1 +PREHOOK: Input: default@bucketmapjoin6_3 +PREHOOK: Input: default@bucketmapjoin6_2 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_19-28-26_168_5204041012827443995/10000 +POSTHOOK: query: select /*+ MAPJOIN(L) */ * from +( +select /*+ MAPJOIN(S) */ +S.key as key, +M.value as Mvalue, S.value as Svalue from +(select TRANSFORM(*) USING 'cat' from bucketmapjoin6_1 where key <= 20 and key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +full outer join +bucketmapjoin6_3 L +on subq2.key = L.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin6_1 +POSTHOOK: Input: default@bucketmapjoin6_3 +POSTHOOK: Input: default@bucketmapjoin6_2 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_19-28-26_168_5204041012827443995/10000 +PREHOOK: query: explain +select /*+ MAPJOIN(L) */ * from +( +select /*+ MAPJOIN(S) */ +S.key as key, +M.value as Mvalue, S.value as Svalue from +(select TRANSFORM(*) USING 'cat' from bucketmapjoin6_1 where key <= 20 and key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +full outer join +bucketmapjoin6_3 L +on subq2.key = L.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+ MAPJOIN(L) */ * from +( +select /*+ MAPJOIN(S) */ +S.key as key, +M.value as Mvalue, S.value as Svalue from +(select TRANSFORM(*) USING 'cat' from bucketmapjoin6_1 where key <= 20 and key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +full outer join +bucketmapjoin6_3 L +on subq2.key = L.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF bucketmapjoin6_1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST TOK_ALLCOLREF) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER))) (TOK_WHERE (and (<= (TOK_TABLE_OR_COL key) 20) (>= (TOK_TABLE_OR_COL key) 50))))) M) (TOK_TABREF bucketmapjoin6_2 S) (= (. (TOK_TABLE_OR_COL M) key) (. (TOK_TABLE_OR_COL S) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST S))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL S) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL M) value) Mvalue) (TOK_SELEXPR (. (TOK_TABLE_OR_COL S) value) Svalue)))) subq2) (TOK_TABREF bucketmapjoin6_3 L) (= (. (TOK_TABLE_OR_COL subq2) key) (. (TOK_TABLE_OR_COL L) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST L))) (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + subq2:m:bucketmapjoin6_1 + TableScan + alias: bucketmapjoin6_1 + Filter Operator + predicate: + expr: ((key <= 20) and (key >= 50)) + type: boolean + Filter Operator + predicate: + expr: ((key <= 20) and (key >= 50)) + type: boolean + Select Operator + expressions: + expr: key + type: int + expr: value + type: string + outputColumnNames: _col0, _col1 + Transform Operator + command: cat + output info: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Common Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {_col1} + 1 {key} {value} + handleSkewJoin: false + keys: + 0 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[_col0]()] + 1 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[key]()] + outputColumnNames: _col1, _col2, _col3 + Position of Big Table: 0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + subq2:s + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subq2:s + TableScan + alias: s + Common Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {_col1} + 1 {key} {value} + handleSkewJoin: false + keys: + 0 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[_col0]()] + 1 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[key]()] + outputColumnNames: _col1, _col2, _col3 + Position of Big Table: 0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_19-28-31_721_1748518040552823309/10002 + Select Operator + expressions: + expr: _col1 + type: string + expr: _col2 + type: int + expr: _col3 + type: string + outputColumnNames: _col1, _col2, _col3 + Select Operator + expressions: + expr: _col2 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + Common Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col2} + 1 {key} {value} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: int + expr: _col4 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: int + expr: _col4 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + l + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + l + TableScan + alias: l + Common Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col2} + 1 {key} {value} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: int + expr: _col4 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: int + expr: _col4 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select /*+ MAPJOIN(L) */ * from +( +select /*+ MAPJOIN(S) */ +S.key as key, +M.value as Mvalue, S.value as Svalue from +(select TRANSFORM(*) USING 'cat' from bucketmapjoin6_1 where key <= 20 and key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +full outer join +bucketmapjoin6_3 L +on subq2.key = L.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin6_1 +PREHOOK: Input: default@bucketmapjoin6_3 +PREHOOK: Input: default@bucketmapjoin6_2 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_19-28-32_592_6435023724406882983/10000 +POSTHOOK: query: select /*+ MAPJOIN(L) */ * from +( +select /*+ MAPJOIN(S) */ +S.key as key, +M.value as Mvalue, S.value as Svalue from +(select TRANSFORM(*) USING 'cat' from bucketmapjoin6_1 where key <= 20 and key >= 50 ) M full outer join bucketmapjoin6_2 S on +M.key = S.key +) subq2 +full outer join +bucketmapjoin6_3 L +on subq2.key = L.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin6_1 +POSTHOOK: Input: default@bucketmapjoin6_3 +POSTHOOK: Input: default@bucketmapjoin6_2 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-10_19-28-32_592_6435023724406882983/10000 +PREHOOK: query: drop table bucketmapjoin6_3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucketmapjoin6_3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@bucketmapjoin6_3 +PREHOOK: query: drop table bucketmapjoin6_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucketmapjoin6_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@bucketmapjoin6_2 +PREHOOK: query: drop table bucketmapjoin6_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucketmapjoin6_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@bucketmapjoin6_1