Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ConditionalTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ConditionalTask.java (revision 910755) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ConditionalTask.java (working copy) @@ -85,7 +85,9 @@ console.printInfo(ExecDriver.getJobEndMsg("" + Utilities.randGen.nextInt()) + ", job is filtered out (removed at runtime)."); - driverContext.incCurJobNo(1); + if(tsk.isMapRedTask()) { + driverContext.incCurJobNo(1); + } if (tsk.getChildTasks() != null) { for (Task child : tsk.getChildTasks()) { child.parentTasks.remove(tsk); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapper.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapper.java (revision 910755) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapper.java (working copy) @@ -254,9 +254,6 @@ String currentInputFile = HiveConf.getVar(jc, HiveConf.ConfVars.HADOOPMAPFILENAME); BucketMapJoinContext bucketMatcherCxt = this.localWork.getBucketMapjoinContext(); Class bucketMatcherCls = bucketMatcherCxt.getBucketMatcherClass(); - if(bucketMatcherCls == null) { - bucketMatcherCls = org.apache.hadoop.hive.ql.exec.DefaultBucketMatcher.class; - } BucketMatcher bucketMatcher = (BucketMatcher) ReflectionUtils.newInstance(bucketMatcherCls, null); bucketMatcher.setAliasBucketFileNameMapping(bucketMatcherCxt.getAliasBucketFileNameMapping()); List aliasFiles = bucketMatcher.getAliasBucketFiles(currentInputFile, Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java (revision 910755) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java (working copy) @@ -20,6 +20,7 @@ import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.Iterator; @@ -28,6 +29,7 @@ import java.util.Map; import java.util.Set; import java.util.Stack; +import java.util.Map.Entry; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -174,15 +176,19 @@ } MapJoinDesc mjDecs = mapJoinOp.getConf(); - LinkedHashMap aliasToBucketNumber = new LinkedHashMap(); - LinkedHashMap> aliasToBucketFileNames = new LinkedHashMap>(); + LinkedHashMap aliasToBucketNumberMapping = new LinkedHashMap(); + LinkedHashMap> aliasToBucketFileNamesMapping = new LinkedHashMap>(); // right now this code does not work with "a join b on a.key = b.key and // a.ds = b.ds", where ds is a partition column. It only works with joins // with only one partition presents in each join source tables. Map> topOps = this.pGraphContext.getTopOps(); Map topToTable = this.pGraphContext.getTopToTable(); - List bucketNumbers = new ArrayList(); + // (partition to bucket file names) and (partition to bucket number) for + // the big table; + LinkedHashMap> bigTblPartsToBucketFileNames = new LinkedHashMap>(); + LinkedHashMap bigTblPartsToBucketNumber = new LinkedHashMap(); + for (int index = 0; index < joinAliases.size(); index++) { String alias = joinAliases.get(index); TableScanOperator tso = (TableScanOperator) topOps.get(alias); @@ -200,46 +206,66 @@ } int partNumber = prunedParts.getConfirmedPartns().size() + prunedParts.getUnknownPartns().size(); - if(partNumber > 1) - return null; - - Partition part = null; - Iterator iter = prunedParts.getConfirmedPartns() - .iterator(); - while (iter.hasNext()) - part = iter.next(); - if (part == null) { + + if (partNumber > 1) { + // only allow one partition for small tables + if(alias != baseBigAlias) { + return null; + } + // here is the big table,and we get more than one partitions. + // construct a mapping of (Partition->bucket file names) and + // (Partition -> bucket number) + Iterator iter = prunedParts.getConfirmedPartns() + .iterator(); + while (iter.hasNext()) { + Partition p = iter.next(); + if (!checkBucketColumns(p.getBucketCols(), mjDecs, index)) + return null; + List fileNames = getOnePartitionBucketFileNames(p); + bigTblPartsToBucketFileNames.put(p, fileNames); + bigTblPartsToBucketNumber.put(p, p.getBucketCount()); + } iter = prunedParts.getUnknownPartns().iterator(); - while (iter.hasNext()) + while (iter.hasNext()) { + Partition p = iter.next(); + if (!checkBucketColumns(p.getBucketCols(), mjDecs, index)) + return null; + List fileNames = getOnePartitionBucketFileNames(p); + bigTblPartsToBucketFileNames.put(p, fileNames); + bigTblPartsToBucketNumber.put(p, p.getBucketCount()); + } + // If there are more than one partition for the big + // table,aliasToBucketFileNamesMapping and partsToBucketNumber will + // not contain mappings for the big table. Instead, the mappings are + // contained in bigTblPartsToBucketFileNames and + // bigTblPartsToBucketNumber + + } else { + Partition part = null; + Iterator iter = prunedParts.getConfirmedPartns() + .iterator(); + part = iter.next(); + if (part == null) { + iter = prunedParts.getUnknownPartns().iterator(); part = iter.next(); - } - - assert part != null; - - if (!checkBucketColumns(part.getBucketCols(), mjDecs, index)) - return null; - - Integer num = new Integer(part.getBucketCount()); - aliasToBucketNumber.put(alias, num); - List fileNames = new ArrayList(); - try { - FileSystem fs = FileSystem.get(this.pGraphContext.getConf()); - FileStatus[] files = fs.listStatus(new Path(part.getDataLocation() - .toString())); - if (files != null) { - for (FileStatus file : files) { - fileNames.add(file.getPath().toString()); - } } - } catch (IOException e) { - throw new SemanticException(e); + assert part != null; + Integer num = new Integer(part.getBucketCount()); + aliasToBucketNumberMapping.put(alias, num); + if (!checkBucketColumns(part.getBucketCols(), mjDecs, index)) + return null; + List fileNames = getOnePartitionBucketFileNames(part); + aliasToBucketFileNamesMapping.put(alias, fileNames); + if (alias == baseBigAlias) { + bigTblPartsToBucketFileNames.put(part, fileNames); + bigTblPartsToBucketNumber.put(part, num); + } } - aliasToBucketFileNames.put(alias, fileNames); } else { if (!checkBucketColumns(tbl.getBucketCols(), mjDecs, index)) return null; Integer num = new Integer(tbl.getNumBuckets()); - aliasToBucketNumber.put(alias, num); + aliasToBucketNumberMapping.put(alias, num); List fileNames = new ArrayList(); try { FileSystem fs = FileSystem.get(this.pGraphContext.getConf()); @@ -252,61 +278,144 @@ } catch (IOException e) { throw new SemanticException(e); } - aliasToBucketFileNames.put(alias, fileNames); + aliasToBucketFileNamesMapping.put(alias, fileNames); } } // All tables or partitions are bucketed, and their bucket number is // stored in 'bucketNumbers', we need to check if the number of buckets in // the big table can be divided by no of buckets in small tables. - int bucketNoInBigTbl = aliasToBucketNumber.get(baseBigAlias).intValue(); - Iterator iter = aliasToBucketNumber.values().iterator(); - while(iter.hasNext()) { - int nxt = iter.next().intValue(); - boolean ok = (nxt >= bucketNoInBigTbl) ? nxt % bucketNoInBigTbl == 0 - : bucketNoInBigTbl % nxt == 0; - if(!ok) - return null; + if (bigTblPartsToBucketNumber.size() > 0) { + Iterator> bigTblPartToBucketNumber = bigTblPartsToBucketNumber + .entrySet().iterator(); + while (bigTblPartToBucketNumber.hasNext()) { + int bucketNumberInPart = bigTblPartToBucketNumber.next().getValue(); + if (!checkBucketNumberAgainstBigTable(aliasToBucketNumberMapping, + bucketNumberInPart)) + return null; + } + } else { + int bucketNoInBigTbl = aliasToBucketNumberMapping.get(baseBigAlias).intValue(); + if (!checkBucketNumberAgainstBigTable(aliasToBucketNumberMapping, + bucketNoInBigTbl)) + return null; } + MapJoinDesc desc = mapJoinOp.getConf(); LinkedHashMap>> aliasBucketFileNameMapping = new LinkedHashMap>>(); - int bigTblBucketNum = aliasToBucketNumber.get(baseBigAlias); - Collections.sort(aliasToBucketFileNames.get(baseBigAlias)); + //sort bucket names for the big table + if(bigTblPartsToBucketNumber.size() > 0) { + Collection> bucketNamesAllParts = bigTblPartsToBucketFileNames.values(); + for(List partBucketNames : bucketNamesAllParts) { + Collections.sort(partBucketNames); + } + } else { + Collections.sort(aliasToBucketFileNamesMapping.get(baseBigAlias)); + } + + // go through all small tables and get the mapping from bucket file name + // in the big table to bucket file names in small tables. for (int j = 0; j < joinAliases.size(); j++) { String alias = joinAliases.get(j); if(alias.equals(baseBigAlias)) continue; - Collections.sort(aliasToBucketFileNames.get(alias)); + Collections.sort(aliasToBucketFileNamesMapping.get(alias)); LinkedHashMap> mapping = new LinkedHashMap>(); aliasBucketFileNameMapping.put(alias, mapping); - for(String inputBigTBLBucket : aliasToBucketFileNames.get(baseBigAlias)) { - int smallTblBucketNum = aliasToBucketNumber.get(alias); - ArrayList resultFileNames = new ArrayList(); - int index = aliasToBucketFileNames.get(baseBigAlias).indexOf(inputBigTBLBucket); - if (bigTblBucketNum >= smallTblBucketNum) { - int temp = bigTblBucketNum / smallTblBucketNum; - int toAddSmallIndex = index/temp; - if(toAddSmallIndex < aliasToBucketFileNames.get(alias).size()) { - resultFileNames.add(aliasToBucketFileNames.get(alias).get(toAddSmallIndex)); - } - } else { - int jump = smallTblBucketNum / bigTblBucketNum; - for (int i = index; i < aliasToBucketFileNames.get(alias).size(); i = i + jump) { - if(i <= aliasToBucketFileNames.get(alias).size()) { - resultFileNames.add(aliasToBucketFileNames.get(alias).get(i)); - } - } + + // for each bucket file in big table, get the corresponding bucket file + // name in the small table. + if (bigTblPartsToBucketNumber.size() > 0) { + //more than 1 partition in the big table, do the mapping for each partition + Iterator>> bigTblPartToBucketNames = bigTblPartsToBucketFileNames + .entrySet().iterator(); + Iterator> bigTblPartToBucketNum = bigTblPartsToBucketNumber + .entrySet().iterator(); + while (bigTblPartToBucketNames.hasNext()) { + assert bigTblPartToBucketNum.hasNext(); + int bigTblBucketNum = bigTblPartToBucketNum.next().getValue().intValue(); + List bigTblBucketNameList = bigTblPartToBucketNames.next().getValue(); + fillMapping(baseBigAlias, aliasToBucketNumberMapping, + aliasToBucketFileNamesMapping, alias, mapping, bigTblBucketNum, + bigTblBucketNameList); } - mapping.put(inputBigTBLBucket, resultFileNames); + } else { + List bigTblBucketNameList = aliasToBucketFileNamesMapping.get(baseBigAlias); + int bigTblBucketNum = aliasToBucketNumberMapping.get(baseBigAlias); + fillMapping(baseBigAlias, aliasToBucketNumberMapping, + aliasToBucketFileNamesMapping, alias, mapping, bigTblBucketNum, + bigTblBucketNameList); } } desc.setAliasBucketFileNameMapping(aliasBucketFileNameMapping); desc.setBigTableAlias(baseBigAlias); return null; } + + private void fillMapping(String baseBigAlias, + LinkedHashMap aliasToBucketNumberMapping, + LinkedHashMap> aliasToBucketFileNamesMapping, + String alias, LinkedHashMap> mapping, + int bigTblBucketNum, List bigTblBucketNameList) { + for (int index = 0; index < bigTblBucketNameList.size(); index++) { + String inputBigTBLBucket = bigTblBucketNameList.get(index); + int smallTblBucketNum = aliasToBucketNumberMapping.get(alias); + ArrayList resultFileNames = new ArrayList(); + if (bigTblBucketNum >= smallTblBucketNum) { + // if the big table has more buckets than the current small table, + // use "MOD" to get small table bucket names. For example, if the big + // table has 4 buckets and the small table has 2 buckets, then the + // mapping should be 0->0, 1->1, 2->0, 3->1. + int toAddSmallIndex = index % smallTblBucketNum; + if(toAddSmallIndex < aliasToBucketFileNamesMapping.get(alias).size()) { + resultFileNames.add(aliasToBucketFileNamesMapping.get(alias).get(toAddSmallIndex)); + } + } else { + int jump = smallTblBucketNum / bigTblBucketNum; + for (int i = index; i < aliasToBucketFileNamesMapping.get(alias).size(); i = i + jump) { + if(i <= aliasToBucketFileNamesMapping.get(alias).size()) { + resultFileNames.add(aliasToBucketFileNamesMapping.get(alias).get(i)); + } + } + } + mapping.put(inputBigTBLBucket, resultFileNames); + } + } + + private boolean checkBucketNumberAgainstBigTable( + LinkedHashMap aliasToBucketNumber, + int bucketNumberInPart) { + Iterator iter = aliasToBucketNumber.values().iterator(); + while(iter.hasNext()) { + int nxt = iter.next().intValue(); + boolean ok = (nxt >= bucketNumberInPart) ? nxt % bucketNumberInPart == 0 + : bucketNumberInPart % nxt == 0; + if(!ok) + return false; + } + return true; + } + + private List getOnePartitionBucketFileNames(Partition part) + throws SemanticException { + List fileNames = new ArrayList(); + try { + FileSystem fs = FileSystem.get(this.pGraphContext.getConf()); + FileStatus[] files = fs.listStatus(new Path(part.getDataLocation() + .toString())); + if (files != null) { + for (FileStatus file : files) { + fileNames.add(file.getPath().toString()); + } + } + } catch (IOException e) { + throw new SemanticException(e); + } + return fileNames; + } private boolean checkBucketColumns(List bucketColumns, MapJoinDesc mjDesc, int index) { List keys = mjDesc.getKeys().get((byte)index); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (revision 910755) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (working copy) @@ -228,6 +228,12 @@ private static void setupBucketMapJoinInfo(MapredWork plan, MapJoinOperator currMapJoinOp) { MapredLocalWork localPlan = plan.getMapLocalWork(); + if (currMapJoinOp != null && localPlan == null) { + localPlan = new MapredLocalWork( + new LinkedHashMap>(), + new LinkedHashMap()); + plan.setMapLocalWork(localPlan); + } if (localPlan != null && currMapJoinOp != null) { LinkedHashMap>> aliasBucketFileNameMapping = currMapJoinOp.getConf().getAliasBucketFileNameMapping(); @@ -237,6 +243,7 @@ bucketMJCxt.setAliasBucketFileNameMapping(aliasBucketFileNameMapping); localPlan.setInputFileChangeSensitive(true); bucketMJCxt.setMapJoinBigTableAlias(currMapJoinOp.getConf().getBigTableAlias()); + bucketMJCxt.setBucketMatcherClass(org.apache.hadoop.hive.ql.exec.DefaultBucketMatcher.class); } } } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java (revision 910755) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java (working copy) @@ -18,10 +18,15 @@ package org.apache.hadoop.hive.ql.plan; +import java.io.File; import java.io.Serializable; +import java.net.URI; import java.util.ArrayList; +import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; +import java.util.Map.Entry; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.BucketMatcher; @@ -84,6 +89,12 @@ this.inputFileChangeSensitive = inputFileChangeSensitive; } + public void deriveExplainAttributes() { + if (bucketMapjoinContext != null) { + bucketMapjoinContext.deriveBucketMapJoinMapping(); + } + } + @Explain(displayName = "Bucket Mapjoin Context", normalExplain = false) public BucketMapJoinContext getBucketMapjoinContext() { return bucketMapjoinContext; @@ -101,11 +112,64 @@ private LinkedHashMap>> aliasBucketFileNameMapping; private String mapJoinBigTableAlias; private Class bucketMatcherClass; + + private LinkedHashMap>> aliasBucketBaseFileNameMapping; public void setMapJoinBigTableAlias(String bigTableAlias) { this.mapJoinBigTableAlias = bigTableAlias; } + + public void deriveBucketMapJoinMapping() { + if (aliasBucketFileNameMapping != null) { + Iterator>>> iter = + aliasBucketFileNameMapping.entrySet().iterator(); + aliasBucketBaseFileNameMapping = new LinkedHashMap>>(); + + while (iter.hasNext()) { + Entry>> old = iter.next(); + + LinkedHashMap> newBucketBaseFileNameMapping = new LinkedHashMap>(); + Iterator>> oldAliasFileNameMappingIter = old.getValue().entrySet().iterator(); + while (oldAliasFileNameMappingIter.hasNext()) { + //For a give table and its bucket full file path list, only keep the base file name (remove file path etc). + //And put the new list into the new mapping. + Entry> oldTableBucketFileNames = oldAliasFileNameMappingIter.next(); + ArrayList oldTableBucketNames = oldTableBucketFileNames.getValue(); + ArrayList newTableBucketFileBaseName = new ArrayList (oldTableBucketNames.size()); + //for each bucket file, only keep its base files and store into a new list. + if (oldTableBucketNames != null) { + for (String bucketFName : oldTableBucketNames) { + newTableBucketFileBaseName.add(getBaseFileName(bucketFName)); + } + } + String bigTblBucketFileName = getBaseFileName(oldTableBucketFileNames.getKey()); + if(newBucketBaseFileNameMapping.containsKey(bigTblBucketFileName)) { + String fullPath = oldTableBucketFileNames.getKey(); + String dir = getBaseFileName(fullPath.substring(0, fullPath.lastIndexOf(bigTblBucketFileName))); + bigTblBucketFileName = dir + File.separator + bigTblBucketFileName; + } + //put the new mapping + newBucketBaseFileNameMapping.put(bigTblBucketFileName, newTableBucketFileBaseName); + } + String tableAlias = old.getKey(); + aliasBucketBaseFileNameMapping.put(tableAlias, newBucketBaseFileNameMapping); + } + } + } + + private String getBaseFileName (String path) { + try { + URI uri = new URI(path); + File file = new File(uri); + return file.getName(); + } catch (Exception ex) { + // This could be due to either URI syntax error or File constructor + // illegal arg; we don't really care which one it is. + return path; + } + } + public String getMapJoinBigTableAlias() { return mapJoinBigTableAlias; } @@ -123,7 +187,7 @@ public LinkedHashMap>> getAliasBucketFileNameMapping() { return aliasBucketFileNameMapping; } - + public void setAliasBucketFileNameMapping( LinkedHashMap>> aliasBucketFileNameMapping) { this.aliasBucketFileNameMapping = aliasBucketFileNameMapping; @@ -135,5 +199,15 @@ else return ""; } + + @Explain(displayName = "Alias Bucket Base File Name Mapping", normalExplain = false) + public LinkedHashMap>> getAliasBucketBaseFileNameMapping() { + return aliasBucketBaseFileNameMapping; + } + + public void setAliasBucketBaseFileNameMapping( + LinkedHashMap>> aliasBucketBaseFileNameMapping) { + this.aliasBucketBaseFileNameMapping = aliasBucketBaseFileNameMapping; + } } } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java (revision 909965) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java (working copy) @@ -270,12 +270,14 @@ * Derive additional attributes to be rendered by EXPLAIN. */ public void deriveExplainAttributes() { - if (pathToPartitionInfo == null) { - return; + if (pathToPartitionInfo != null) { + for (Map.Entry entry : pathToPartitionInfo + .entrySet()) { + entry.getValue().deriveBaseFileName(entry.getKey()); + } } - for (Map.Entry entry : pathToPartitionInfo - .entrySet()) { - entry.getValue().deriveBaseFileName(entry.getKey()); + if (mapLocalWork != null) { + mapLocalWork.deriveExplainAttributes(); } } Index: ql/src/test/queries/clientpositive/bucketmapjoin5.q =================================================================== --- ql/src/test/queries/clientpositive/bucketmapjoin5.q (revision 0) +++ ql/src/test/queries/clientpositive/bucketmapjoin5.q (revision 0) @@ -0,0 +1,54 @@ +CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin; +load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin; + +CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09'); +load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09'); +load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09'); +load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09'); + +CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); +load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); +load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09'); +load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09'); + + +set hive.optimize.bucketmapjoin = true; +create table bucketmapjoin_tmp_result (key string , value1 string, value2 string); + +explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key; + +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key; + +select count(1) from bucketmapjoin_tmp_result; + +explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key; + +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key; + +select count(1) from bucketmapjoin_tmp_result; + +drop table bucketmapjoin_tmp_result; +drop table srcbucket_mapjoin; +drop table srcbucket_mapjoin_part; +drop table srcbucket_mapjoin_part_2; \ No newline at end of file Index: ql/src/test/queries/clientpositive/bucketmapjoin_negative.q =================================================================== --- ql/src/test/queries/clientpositive/bucketmapjoin_negative.q (revision 0) +++ ql/src/test/queries/clientpositive/bucketmapjoin_negative.q (revision 0) @@ -0,0 +1,23 @@ +CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin; +load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin; + +CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE; +load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); + + + +set hive.optimize.bucketmapjoin = true; +create table bucketmapjoin_tmp_result (key string , value1 string, value2 string); + +explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08"; + +drop table bucketmapjoin_tmp_result; +drop table srcbucket_mapjoin; +drop table srcbucket_mapjoin_part; \ No newline at end of file Index: ql/src/test/queries/clientpositive/bucketmapjoin_negative2.q =================================================================== --- ql/src/test/queries/clientpositive/bucketmapjoin_negative2.q (revision 0) +++ ql/src/test/queries/clientpositive/bucketmapjoin_negative2.q (revision 0) @@ -0,0 +1,22 @@ +CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin; +load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin; + +CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); +load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); +load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09'); +load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09'); + +set hive.optimize.bucketmapjoin = true; +create table bucketmapjoin_tmp_result (key string , value1 string, value2 string); + +explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key; + +drop table bucketmapjoin_tmp_result; +drop table srcbucket_mapjoin; +drop table srcbucket_mapjoin_part_2; \ No newline at end of file Index: ql/src/test/results/clientpositive/bucketmapjoin1.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin1.q.out (revision 910755) +++ ql/src/test/results/clientpositive/bucketmapjoin1.q.out (working copy) @@ -127,7 +127,7 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-18-33_238_8713741927786752274/10002 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_17-27-54_734_6780901180289995721/10002 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -142,7 +142,7 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266347913 + transient_lastDdlTime 1266456474 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result Local Work: @@ -200,7 +200,7 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-18-33_238_8713741927786752274/10002 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_17-27-54_734_6780901180289995721/10002 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -215,10 +215,12 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266347913 + transient_lastDdlTime 1266456474 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + b {srcbucket20.txt=[srcbucket20.txt, srcbucket22.txt], srcbucket21.txt=[srcbucket21.txt, srcbucket23.txt]} Alias Bucket File Name Mapping: b {file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt, file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt, file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt]} Needs Tagging: false @@ -242,7 +244,7 @@ serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266347911 + transient_lastDdlTime 1266456471 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -259,7 +261,7 @@ serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266347911 + transient_lastDdlTime 1266456471 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcbucket_mapjoin name: srcbucket_mapjoin @@ -271,14 +273,14 @@ Move Operator files: hdfs directory: true - source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-18-33_238_8713741927786752274/10002 - destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-18-33_238_8713741927786752274/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_17-27-54_734_6780901180289995721/10002 + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_17-27-54_734_6780901180289995721/10000 Stage: Stage-0 Move Operator tables: replace: true - source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-18-33_238_8713741927786752274/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_17-27-54_734_6780901180289995721/10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -293,15 +295,15 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266347913 + transient_lastDdlTime 1266456474 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result - tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-18-33_238_8713741927786752274/10001 + tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_17-27-54_734_6780901180289995721/10001 Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-18-33_238_8713741927786752274/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_17-27-54_734_6780901180289995721/10002 Reduce Output Operator sort order: Map-reduce partition columns: @@ -317,9 +319,9 @@ type: string Needs Tagging: false Path -> Alias: - file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-18-33_238_8713741927786752274/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-18-33_238_8713741927786752274/10002] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_17-27-54_734_6780901180289995721/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_17-27-54_734_6780901180289995721/10002] Path -> Partition: - file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-18-33_238_8713741927786752274/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_17-27-54_734_6780901180289995721/10002 Partition base file name: 10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -335,7 +337,7 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266347913 + transient_lastDdlTime 1266456474 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -351,7 +353,7 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266347913 + transient_lastDdlTime 1266456474 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result name: bucketmapjoin_tmp_result @@ -360,7 +362,7 @@ File Output Operator compressed: false GlobalTableId: 0 - directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-18-33_238_8713741927786752274/10000 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_17-27-54_734_6780901180289995721/10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -375,7 +377,7 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266347913 + transient_lastDdlTime 1266456474 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result @@ -399,11 +401,11 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-18-53_519_5956091880420865175/10000 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_17-28-15_357_4624264229498554591/10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-18-53_519_5956091880420865175/10000 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_17-28-15_357_4624264229498554591/10000 464 PREHOOK: query: explain extended insert overwrite table bucketmapjoin_tmp_result @@ -479,7 +481,7 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-18-59_541_5540791583271613396/10002 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_17-28-21_449_338707100068926956/10002 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -494,7 +496,7 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266347913 + transient_lastDdlTime 1266456474 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result Local Work: @@ -547,7 +549,7 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-18-59_541_5540791583271613396/10002 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_17-28-21_449_338707100068926956/10002 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -562,9 +564,14 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266347913 + transient_lastDdlTime 1266456474 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result + Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + a {srcbucket20.txt=[srcbucket20.txt], srcbucket21.txt=[srcbucket21.txt], srcbucket22.txt=[srcbucket20.txt], srcbucket23.txt=[srcbucket21.txt]} + Alias Bucket File Name Mapping: + a {file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt]} Needs Tagging: false Path -> Alias: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 [b] @@ -589,7 +596,7 @@ serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266347911 + transient_lastDdlTime 1266456472 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -607,7 +614,7 @@ serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266347911 + transient_lastDdlTime 1266456472 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcbucket_mapjoin_part name: srcbucket_mapjoin_part @@ -619,14 +626,14 @@ Move Operator files: hdfs directory: true - source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-18-59_541_5540791583271613396/10002 - destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-18-59_541_5540791583271613396/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_17-28-21_449_338707100068926956/10002 + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_17-28-21_449_338707100068926956/10000 Stage: Stage-0 Move Operator tables: replace: true - source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-18-59_541_5540791583271613396/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_17-28-21_449_338707100068926956/10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -641,15 +648,15 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266347913 + transient_lastDdlTime 1266456474 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result - tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-18-59_541_5540791583271613396/10001 + tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_17-28-21_449_338707100068926956/10001 Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-18-59_541_5540791583271613396/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_17-28-21_449_338707100068926956/10002 Reduce Output Operator sort order: Map-reduce partition columns: @@ -665,9 +672,9 @@ type: string Needs Tagging: false Path -> Alias: - file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-18-59_541_5540791583271613396/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-18-59_541_5540791583271613396/10002] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_17-28-21_449_338707100068926956/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_17-28-21_449_338707100068926956/10002] Path -> Partition: - file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-18-59_541_5540791583271613396/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_17-28-21_449_338707100068926956/10002 Partition base file name: 10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -683,7 +690,7 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266347913 + transient_lastDdlTime 1266456474 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -699,7 +706,7 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266347913 + transient_lastDdlTime 1266456474 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result name: bucketmapjoin_tmp_result @@ -708,7 +715,7 @@ File Output Operator compressed: false GlobalTableId: 0 - directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-18-59_541_5540791583271613396/10000 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_17-28-21_449_338707100068926956/10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -723,7 +730,7 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266347913 + transient_lastDdlTime 1266456474 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result @@ -747,11 +754,11 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-19-11_856_1704347966505635966/10000 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_17-28-33_801_5918516728134744043/10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-19-11_856_1704347966505635966/10000 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_17-28-33_801_5918516728134744043/10000 464 PREHOOK: query: drop table bucketmapjoin_tmp_result PREHOOK: type: DROPTABLE Index: ql/src/test/results/clientpositive/bucketmapjoin2.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin2.q.out (revision 910755) +++ ql/src/test/results/clientpositive/bucketmapjoin2.q.out (working copy) @@ -120,7 +120,7 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-21-31_209_8942375635453959545/10002 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-44-51_633_1999172294254156347/10002 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -135,7 +135,7 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348091 + transient_lastDdlTime 1266371091 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result Local Work: @@ -191,7 +191,7 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-21-31_209_8942375635453959545/10002 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-44-51_633_1999172294254156347/10002 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -206,10 +206,12 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348091 + transient_lastDdlTime 1266371091 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + b {srcbucket20.txt=[srcbucket22.txt], srcbucket21.txt=[srcbucket23.txt]} Alias Bucket File Name Mapping: b {file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt]} Needs Tagging: false @@ -233,7 +235,7 @@ serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348088 + transient_lastDdlTime 1266371089 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -250,7 +252,7 @@ serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348088 + transient_lastDdlTime 1266371089 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcbucket_mapjoin name: srcbucket_mapjoin @@ -262,14 +264,14 @@ Move Operator files: hdfs directory: true - source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-21-31_209_8942375635453959545/10002 - destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-21-31_209_8942375635453959545/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-44-51_633_1999172294254156347/10002 + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-44-51_633_1999172294254156347/10000 Stage: Stage-0 Move Operator tables: replace: true - source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-21-31_209_8942375635453959545/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-44-51_633_1999172294254156347/10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -284,15 +286,15 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348091 + transient_lastDdlTime 1266371091 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result - tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-21-31_209_8942375635453959545/10001 + tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-44-51_633_1999172294254156347/10001 Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-21-31_209_8942375635453959545/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-44-51_633_1999172294254156347/10002 Reduce Output Operator sort order: Map-reduce partition columns: @@ -308,9 +310,9 @@ type: string Needs Tagging: false Path -> Alias: - file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-21-31_209_8942375635453959545/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-21-31_209_8942375635453959545/10002] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-44-51_633_1999172294254156347/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-44-51_633_1999172294254156347/10002] Path -> Partition: - file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-21-31_209_8942375635453959545/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-44-51_633_1999172294254156347/10002 Partition base file name: 10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -326,7 +328,7 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348091 + transient_lastDdlTime 1266371091 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -342,7 +344,7 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348091 + transient_lastDdlTime 1266371091 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result name: bucketmapjoin_tmp_result @@ -351,7 +353,7 @@ File Output Operator compressed: false GlobalTableId: 0 - directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-21-31_209_8942375635453959545/10000 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-44-51_633_1999172294254156347/10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -366,10 +368,11 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348091 + transient_lastDdlTime 1266371091 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b @@ -389,11 +392,11 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-21-42_329_5595416864594992794/10000 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-45-03_265_8356277554306072279/10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-21-42_329_5595416864594992794/10000 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-45-03_265_8356277554306072279/10000 0 PREHOOK: query: explain extended insert overwrite table bucketmapjoin_tmp_result @@ -467,7 +470,7 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-21-48_339_1875376092387826822/10002 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-45-12_472_2273703540554984697/10002 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -482,7 +485,7 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348091 + transient_lastDdlTime 1266371091 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result Local Work: @@ -528,7 +531,7 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-21-48_339_1875376092387826822/10002 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-45-12_472_2273703540554984697/10002 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -543,9 +546,14 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348091 + transient_lastDdlTime 1266371091 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result + Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + a {srcbucket22.txt=[srcbucket20.txt], srcbucket23.txt=[srcbucket21.txt]} + Alias Bucket File Name Mapping: + a {file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt]} Needs Tagging: false Path -> Alias: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08 [b] @@ -570,7 +578,7 @@ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348090 + transient_lastDdlTime 1266371090 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -588,7 +596,7 @@ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348090 + transient_lastDdlTime 1266371090 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcbucket_mapjoin_part_2 name: srcbucket_mapjoin_part_2 @@ -600,14 +608,14 @@ Move Operator files: hdfs directory: true - source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-21-48_339_1875376092387826822/10002 - destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-21-48_339_1875376092387826822/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-45-12_472_2273703540554984697/10002 + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-45-12_472_2273703540554984697/10000 Stage: Stage-0 Move Operator tables: replace: true - source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-21-48_339_1875376092387826822/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-45-12_472_2273703540554984697/10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -622,15 +630,15 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348091 + transient_lastDdlTime 1266371091 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result - tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-21-48_339_1875376092387826822/10001 + tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-45-12_472_2273703540554984697/10001 Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-21-48_339_1875376092387826822/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-45-12_472_2273703540554984697/10002 Reduce Output Operator sort order: Map-reduce partition columns: @@ -646,9 +654,9 @@ type: string Needs Tagging: false Path -> Alias: - file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-21-48_339_1875376092387826822/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-21-48_339_1875376092387826822/10002] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-45-12_472_2273703540554984697/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-45-12_472_2273703540554984697/10002] Path -> Partition: - file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-21-48_339_1875376092387826822/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-45-12_472_2273703540554984697/10002 Partition base file name: 10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -664,7 +672,7 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348091 + transient_lastDdlTime 1266371091 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -680,7 +688,7 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348091 + transient_lastDdlTime 1266371091 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result name: bucketmapjoin_tmp_result @@ -689,7 +697,7 @@ File Output Operator compressed: false GlobalTableId: 0 - directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-21-48_339_1875376092387826822/10000 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-45-12_472_2273703540554984697/10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -704,7 +712,7 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348091 + transient_lastDdlTime 1266371091 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result @@ -728,11 +736,11 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-21-57_739_3240200473053997462/10000 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-45-41_244_344319565249197738/10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-21-57_739_3240200473053997462/10000 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-45-41_244_344319565249197738/10000 0 PREHOOK: query: drop table bucketmapjoin_tmp_result PREHOOK: type: DROPTABLE Index: ql/src/test/results/clientpositive/bucketmapjoin3.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin3.q.out (revision 910755) +++ ql/src/test/results/clientpositive/bucketmapjoin3.q.out (working copy) @@ -130,7 +130,7 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-25-48_888_596648048761581996/10002 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-22-50_595_1909253922065021058/10002 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -145,7 +145,7 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348348 + transient_lastDdlTime 1266373370 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result Local Work: @@ -201,7 +201,7 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-25-48_888_596648048761581996/10002 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-22-50_595_1909253922065021058/10002 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -216,10 +216,12 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348348 + transient_lastDdlTime 1266373370 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + b {srcbucket22.txt=[srcbucket20.txt, srcbucket22.txt], srcbucket23.txt=[srcbucket21.txt, srcbucket23.txt]} Alias Bucket File Name Mapping: b {file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt, file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt, file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt]} Needs Tagging: false @@ -246,7 +248,7 @@ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348348 + transient_lastDdlTime 1266373369 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -264,7 +266,7 @@ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348348 + transient_lastDdlTime 1266373369 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcbucket_mapjoin_part_2 name: srcbucket_mapjoin_part_2 @@ -276,14 +278,14 @@ Move Operator files: hdfs directory: true - source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-25-48_888_596648048761581996/10002 - destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-25-48_888_596648048761581996/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-22-50_595_1909253922065021058/10002 + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-22-50_595_1909253922065021058/10000 Stage: Stage-0 Move Operator tables: replace: true - source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-25-48_888_596648048761581996/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-22-50_595_1909253922065021058/10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -298,15 +300,15 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348348 + transient_lastDdlTime 1266373370 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result - tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-25-48_888_596648048761581996/10001 + tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-22-50_595_1909253922065021058/10001 Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-25-48_888_596648048761581996/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-22-50_595_1909253922065021058/10002 Reduce Output Operator sort order: Map-reduce partition columns: @@ -322,9 +324,9 @@ type: string Needs Tagging: false Path -> Alias: - file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-25-48_888_596648048761581996/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-25-48_888_596648048761581996/10002] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-22-50_595_1909253922065021058/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-22-50_595_1909253922065021058/10002] Path -> Partition: - file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-25-48_888_596648048761581996/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-22-50_595_1909253922065021058/10002 Partition base file name: 10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -340,7 +342,7 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348348 + transient_lastDdlTime 1266373370 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -356,7 +358,7 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348348 + transient_lastDdlTime 1266373370 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result name: bucketmapjoin_tmp_result @@ -365,7 +367,7 @@ File Output Operator compressed: false GlobalTableId: 0 - directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-25-48_888_596648048761581996/10000 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-22-50_595_1909253922065021058/10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -380,7 +382,7 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348348 + transient_lastDdlTime 1266373370 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result @@ -404,11 +406,11 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-26-00_556_812820418823036631/10000 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-23-03_941_6010726807841304647/10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-26-00_556_812820418823036631/10000 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-23-03_941_6010726807841304647/10000 564 PREHOOK: query: explain extended insert overwrite table bucketmapjoin_tmp_result @@ -482,7 +484,7 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-26-05_227_2931936093299864984/10002 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-23-08_989_3934503304346533947/10002 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -497,7 +499,7 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348348 + transient_lastDdlTime 1266373370 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result Local Work: @@ -553,7 +555,7 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-26-05_227_2931936093299864984/10002 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-23-08_989_3934503304346533947/10002 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -568,9 +570,14 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348348 + transient_lastDdlTime 1266373370 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result + Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + a {srcbucket20.txt=[srcbucket22.txt], srcbucket21.txt=[srcbucket23.txt], srcbucket22.txt=[srcbucket22.txt], srcbucket23.txt=[srcbucket23.txt]} + Alias Bucket File Name Mapping: + a {file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt]} Needs Tagging: false Path -> Alias: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 [b] @@ -595,7 +602,7 @@ serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348346 + transient_lastDdlTime 1266373368 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -613,7 +620,7 @@ serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348346 + transient_lastDdlTime 1266373368 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcbucket_mapjoin_part name: srcbucket_mapjoin_part @@ -625,14 +632,14 @@ Move Operator files: hdfs directory: true - source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-26-05_227_2931936093299864984/10002 - destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-26-05_227_2931936093299864984/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-23-08_989_3934503304346533947/10002 + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-23-08_989_3934503304346533947/10000 Stage: Stage-0 Move Operator tables: replace: true - source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-26-05_227_2931936093299864984/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-23-08_989_3934503304346533947/10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -647,15 +654,15 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348348 + transient_lastDdlTime 1266373370 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result - tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-26-05_227_2931936093299864984/10001 + tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-23-08_989_3934503304346533947/10001 Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-26-05_227_2931936093299864984/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-23-08_989_3934503304346533947/10002 Reduce Output Operator sort order: Map-reduce partition columns: @@ -671,9 +678,9 @@ type: string Needs Tagging: false Path -> Alias: - file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-26-05_227_2931936093299864984/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-26-05_227_2931936093299864984/10002] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-23-08_989_3934503304346533947/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-23-08_989_3934503304346533947/10002] Path -> Partition: - file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-26-05_227_2931936093299864984/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-23-08_989_3934503304346533947/10002 Partition base file name: 10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -689,7 +696,7 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348348 + transient_lastDdlTime 1266373370 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -705,7 +712,7 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348348 + transient_lastDdlTime 1266373370 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result name: bucketmapjoin_tmp_result @@ -714,7 +721,7 @@ File Output Operator compressed: false GlobalTableId: 0 - directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-26-05_227_2931936093299864984/10000 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-23-08_989_3934503304346533947/10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -729,7 +736,7 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348348 + transient_lastDdlTime 1266373370 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result @@ -753,11 +760,11 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-26-18_454_3816899584325655687/10000 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-23-21_890_3127085656127868541/10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-26-18_454_3816899584325655687/10000 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-23-21_890_3127085656127868541/10000 564 PREHOOK: query: drop table bucketmapjoin_tmp_result PREHOOK: type: DROPTABLE Index: ql/src/test/results/clientpositive/bucketmapjoin4.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin4.q.out (revision 910755) +++ ql/src/test/results/clientpositive/bucketmapjoin4.q.out (working copy) @@ -120,7 +120,7 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-28-34_768_5027569884636052319/10002 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-51-28_790_5707462252972969032/10002 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -135,7 +135,7 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348514 + transient_lastDdlTime 1266371488 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result Local Work: @@ -181,7 +181,7 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-28-34_768_5027569884636052319/10002 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-51-28_790_5707462252972969032/10002 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -196,10 +196,12 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348514 + transient_lastDdlTime 1266371488 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + b {srcbucket20.txt=[srcbucket20.txt], srcbucket21.txt=[srcbucket21.txt]} Alias Bucket File Name Mapping: b {file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt]} Needs Tagging: false @@ -223,7 +225,7 @@ serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348512 + transient_lastDdlTime 1266371486 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -240,7 +242,7 @@ serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348512 + transient_lastDdlTime 1266371486 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcbucket_mapjoin name: srcbucket_mapjoin @@ -252,14 +254,14 @@ Move Operator files: hdfs directory: true - source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-28-34_768_5027569884636052319/10002 - destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-28-34_768_5027569884636052319/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-51-28_790_5707462252972969032/10002 + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-51-28_790_5707462252972969032/10000 Stage: Stage-0 Move Operator tables: replace: true - source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-28-34_768_5027569884636052319/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-51-28_790_5707462252972969032/10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -274,15 +276,15 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348514 + transient_lastDdlTime 1266371488 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result - tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-28-34_768_5027569884636052319/10001 + tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-51-28_790_5707462252972969032/10001 Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-28-34_768_5027569884636052319/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-51-28_790_5707462252972969032/10002 Reduce Output Operator sort order: Map-reduce partition columns: @@ -298,9 +300,9 @@ type: string Needs Tagging: false Path -> Alias: - file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-28-34_768_5027569884636052319/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-28-34_768_5027569884636052319/10002] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-51-28_790_5707462252972969032/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-51-28_790_5707462252972969032/10002] Path -> Partition: - file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-28-34_768_5027569884636052319/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-51-28_790_5707462252972969032/10002 Partition base file name: 10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -316,7 +318,7 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348514 + transient_lastDdlTime 1266371488 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -332,7 +334,7 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348514 + transient_lastDdlTime 1266371488 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result name: bucketmapjoin_tmp_result @@ -341,7 +343,7 @@ File Output Operator compressed: false GlobalTableId: 0 - directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-28-34_768_5027569884636052319/10000 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-51-28_790_5707462252972969032/10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -356,7 +358,7 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348514 + transient_lastDdlTime 1266371488 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result @@ -378,11 +380,11 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-28-45_494_716972105236992029/10000 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-51-40_746_6816732761487407360/10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-28-45_494_716972105236992029/10000 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-51-40_746_6816732761487407360/10000 464 PREHOOK: query: explain extended insert overwrite table bucketmapjoin_tmp_result @@ -446,7 +448,7 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-28-51_003_6731580911985650125/10002 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-51-51_982_513379661305222088/10002 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -461,7 +463,7 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348514 + transient_lastDdlTime 1266371488 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result Local Work: @@ -507,7 +509,7 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-28-51_003_6731580911985650125/10002 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-51-51_982_513379661305222088/10002 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -522,9 +524,14 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348514 + transient_lastDdlTime 1266371488 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result + Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + a {srcbucket20.txt=[srcbucket20.txt], srcbucket21.txt=[srcbucket21.txt]} + Alias Bucket File Name Mapping: + a {file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt]} Needs Tagging: false Path -> Alias: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin [b] @@ -546,7 +553,7 @@ serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348512 + transient_lastDdlTime 1266371486 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -563,7 +570,7 @@ serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348512 + transient_lastDdlTime 1266371486 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcbucket_mapjoin name: srcbucket_mapjoin @@ -575,14 +582,14 @@ Move Operator files: hdfs directory: true - source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-28-51_003_6731580911985650125/10002 - destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-28-51_003_6731580911985650125/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-51-51_982_513379661305222088/10002 + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-51-51_982_513379661305222088/10000 Stage: Stage-0 Move Operator tables: replace: true - source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-28-51_003_6731580911985650125/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-51-51_982_513379661305222088/10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -597,15 +604,15 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348514 + transient_lastDdlTime 1266371488 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result - tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-28-51_003_6731580911985650125/10001 + tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-51-51_982_513379661305222088/10001 Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-28-51_003_6731580911985650125/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-51-51_982_513379661305222088/10002 Reduce Output Operator sort order: Map-reduce partition columns: @@ -621,9 +628,9 @@ type: string Needs Tagging: false Path -> Alias: - file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-28-51_003_6731580911985650125/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-28-51_003_6731580911985650125/10002] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-51-51_982_513379661305222088/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-51-51_982_513379661305222088/10002] Path -> Partition: - file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-28-51_003_6731580911985650125/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-51-51_982_513379661305222088/10002 Partition base file name: 10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -639,7 +646,7 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348514 + transient_lastDdlTime 1266371488 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -655,7 +662,7 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348514 + transient_lastDdlTime 1266371488 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result name: bucketmapjoin_tmp_result @@ -664,7 +671,7 @@ File Output Operator compressed: false GlobalTableId: 0 - directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-28-51_003_6731580911985650125/10000 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-51-51_982_513379661305222088/10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -679,7 +686,7 @@ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266348514 + transient_lastDdlTime 1266371488 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result @@ -701,11 +708,11 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-29-00_791_6350546530170724014/10000 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-52-03_719_5734391229034774244/10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_11-29-00_791_6350546530170724014/10000 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_17-52-03_719_5734391229034774244/10000 464 PREHOOK: query: drop table bucketmapjoin_tmp_result PREHOOK: type: DROPTABLE Index: ql/src/test/results/clientpositive/bucketmapjoin5.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin5.q.out (revision 0) +++ ql/src/test/results/clientpositive/bucketmapjoin5.q.out (revision 0) @@ -0,0 +1,868 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part +PREHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-09 +PREHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-09 +PREHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-09 +PREHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-09 +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +PREHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +PREHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF srcbucket_mapjoin a) (TOK_TABREF srcbucket_mapjoin_part b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucketmapjoin_tmp_result)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-4 depends on stages: Stage-1 , consists of Stage-3, Stage-2 + Stage-3 + Stage-0 depends on stages: Stage-3, Stage-2 + Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col3 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-25-06_301_6910732375780441634/10002 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266463506 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a + TableScan + alias: a + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col3 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-25-06_301_6910732375780441634/10002 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266463506 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + a {srcbucket20.txt=[srcbucket20.txt], srcbucket21.txt=[srcbucket21.txt], srcbucket22.txt=[srcbucket20.txt], srcbucket23.txt=[srcbucket21.txt], ds=2008-04-09/srcbucket20.txt=[srcbucket20.txt], ds=2008-04-09/srcbucket21.txt=[srcbucket21.txt], ds=2008-04-09/srcbucket22.txt=[srcbucket20.txt], ds=2008-04-09/srcbucket23.txt=[srcbucket21.txt]} + Alias Bucket File Name Mapping: + a {file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket20.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket21.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket22.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket23.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt]} + Needs Tagging: false + Path -> Alias: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 [b] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09 [b] + Path -> Partition: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part + name srcbucket_mapjoin_part + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266463504 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part + name srcbucket_mapjoin_part + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266463504 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: srcbucket_mapjoin_part + name: srcbucket_mapjoin_part + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09 + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part + name srcbucket_mapjoin_part + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266463504 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part + name srcbucket_mapjoin_part + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266463504 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: srcbucket_mapjoin_part + name: srcbucket_mapjoin_part + + Stage: Stage-4 + Conditional Operator + + Stage: Stage-3 + Move Operator + files: + hdfs directory: true + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-25-06_301_6910732375780441634/10002 + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-25-06_301_6910732375780441634/10000 + + Stage: Stage-0 + Move Operator + tables: + replace: true + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-25-06_301_6910732375780441634/10000 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266463506 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-25-06_301_6910732375780441634/10001 + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-25-06_301_6910732375780441634/10002 + Reduce Output Operator + sort order: + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + value expressions: + expr: key + type: string + expr: value1 + type: string + expr: value2 + type: string + Needs Tagging: false + Path -> Alias: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-25-06_301_6910732375780441634/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-25-06_301_6910732375780441634/10002] + Path -> Partition: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-25-06_301_6910732375780441634/10002 + Partition + base file name: 10002 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266463506 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266463506 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + name: bucketmapjoin_tmp_result + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-25-06_301_6910732375780441634/10000 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266463506 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + + +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-09 +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-09 +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-25-19_197_6120750452114821507/10000 +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-25-19_197_6120750452114821507/10000 +928 +PREHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF srcbucket_mapjoin a) (TOK_TABREF srcbucket_mapjoin_part_2 b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucketmapjoin_tmp_result)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-4 depends on stages: Stage-1 , consists of Stage-3, Stage-2 + Stage-3 + Stage-0 depends on stages: Stage-3, Stage-2 + Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col3 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-25-23_507_7808943088327960371/10002 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266463506 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a + TableScan + alias: a + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col3 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-25-23_507_7808943088327960371/10002 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266463506 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + a {srcbucket22.txt=[srcbucket20.txt], srcbucket23.txt=[srcbucket21.txt], ds=2008-04-09/srcbucket22.txt=[srcbucket20.txt], ds=2008-04-09/srcbucket23.txt=[srcbucket21.txt]} + Alias Bucket File Name Mapping: + a {file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-09/srcbucket22.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-09/srcbucket23.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt]} + Needs Tagging: false + Path -> Alias: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08 [b] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-09 [b] + Path -> Partition: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08 + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2 + name srcbucket_mapjoin_part_2 + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266463505 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2 + name srcbucket_mapjoin_part_2 + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266463505 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: srcbucket_mapjoin_part_2 + name: srcbucket_mapjoin_part_2 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-09 + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2 + name srcbucket_mapjoin_part_2 + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266463505 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2 + name srcbucket_mapjoin_part_2 + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266463505 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: srcbucket_mapjoin_part_2 + name: srcbucket_mapjoin_part_2 + + Stage: Stage-4 + Conditional Operator + + Stage: Stage-3 + Move Operator + files: + hdfs directory: true + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-25-23_507_7808943088327960371/10002 + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-25-23_507_7808943088327960371/10000 + + Stage: Stage-0 + Move Operator + tables: + replace: true + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-25-23_507_7808943088327960371/10000 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266463506 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-25-23_507_7808943088327960371/10001 + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-25-23_507_7808943088327960371/10002 + Reduce Output Operator + sort order: + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + value expressions: + expr: key + type: string + expr: value1 + type: string + expr: value2 + type: string + Needs Tagging: false + Path -> Alias: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-25-23_507_7808943088327960371/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-25-23_507_7808943088327960371/10002] + Path -> Partition: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-25-23_507_7808943088327960371/10002 + Partition + base file name: 10002 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266463506 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266463506 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + name: bucketmapjoin_tmp_result + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-25-23_507_7808943088327960371/10000 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266463506 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + + +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-25-38_412_8993913997326343300/10000 +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-25-38_412_8993913997326343300/10000 +0 +PREHOOK: query: drop table bucketmapjoin_tmp_result +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucketmapjoin_tmp_result +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: drop table srcbucket_mapjoin +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table srcbucket_mapjoin +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: drop table srcbucket_mapjoin_part +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table srcbucket_mapjoin_part +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part +PREHOOK: query: drop table srcbucket_mapjoin_part_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table srcbucket_mapjoin_part_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 Index: ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out (revision 0) +++ ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out (revision 0) @@ -0,0 +1,374 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part +PREHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08" +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08" +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF srcbucket_mapjoin a) (TOK_TABREF srcbucket_mapjoin_part b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucketmapjoin_tmp_result)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08")))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-4 depends on stages: Stage-1 , consists of Stage-3, Stage-2 + Stage-3 + Stage-0 depends on stages: Stage-3, Stage-2 + Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} {ds} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col3, _col4 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + outputColumnNames: _col0, _col1, _col3, _col4 + Filter Operator + isSamplingPred: false + predicate: + expr: (_col4 = '2008-04-08') + type: boolean + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-32-47_561_7700459741942400130/10002 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266373967 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + b + TableScan + alias: b + Filter Operator + isSamplingPred: false + predicate: + expr: (ds = '2008-04-08') + type: boolean + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} {ds} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col3, _col4 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + outputColumnNames: _col0, _col1, _col3, _col4 + Filter Operator + isSamplingPred: false + predicate: + expr: (_col4 = '2008-04-08') + type: boolean + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-32-47_561_7700459741942400130/10002 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266373967 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + Needs Tagging: false + Path -> Alias: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin [a] + Path -> Partition: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin + Partition + base file name: srcbucket_mapjoin + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin + name srcbucket_mapjoin + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266373965 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin + name srcbucket_mapjoin + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266373965 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: srcbucket_mapjoin + name: srcbucket_mapjoin + + Stage: Stage-4 + Conditional Operator + + Stage: Stage-3 + Move Operator + files: + hdfs directory: true + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-32-47_561_7700459741942400130/10002 + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-32-47_561_7700459741942400130/10000 + + Stage: Stage-0 + Move Operator + tables: + replace: true + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-32-47_561_7700459741942400130/10000 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266373967 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-32-47_561_7700459741942400130/10001 + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-32-47_561_7700459741942400130/10002 + Reduce Output Operator + sort order: + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + value expressions: + expr: key + type: string + expr: value1 + type: string + expr: value2 + type: string + Needs Tagging: false + Path -> Alias: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-32-47_561_7700459741942400130/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-32-47_561_7700459741942400130/10002] + Path -> Partition: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-32-47_561_7700459741942400130/10002 + Partition + base file name: 10002 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266373967 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266373967 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + name: bucketmapjoin_tmp_result + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-16_18-32-47_561_7700459741942400130/10000 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266373967 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + + +PREHOOK: query: drop table bucketmapjoin_tmp_result +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucketmapjoin_tmp_result +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: drop table srcbucket_mapjoin +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table srcbucket_mapjoin +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: drop table srcbucket_mapjoin_part +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table srcbucket_mapjoin_part +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part Index: ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out (revision 0) +++ ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out (revision 0) @@ -0,0 +1,360 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +PREHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +PREHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF srcbucket_mapjoin a) (TOK_TABREF srcbucket_mapjoin_part_2 b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucketmapjoin_tmp_result)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-4 depends on stages: Stage-1 , consists of Stage-3, Stage-2 + Stage-3 + Stage-0 depends on stages: Stage-3, Stage-2 + Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col3 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-12-35_239_9032004865602581317/10002 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266462755 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + b + TableScan + alias: b + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col3 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-12-35_239_9032004865602581317/10002 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266462755 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + Needs Tagging: false + Path -> Alias: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin [a] + Path -> Partition: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin + Partition + base file name: srcbucket_mapjoin + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin + name srcbucket_mapjoin + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266462753 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin + name srcbucket_mapjoin + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266462753 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: srcbucket_mapjoin + name: srcbucket_mapjoin + + Stage: Stage-4 + Conditional Operator + + Stage: Stage-3 + Move Operator + files: + hdfs directory: true + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-12-35_239_9032004865602581317/10002 + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-12-35_239_9032004865602581317/10000 + + Stage: Stage-0 + Move Operator + tables: + replace: true + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-12-35_239_9032004865602581317/10000 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266462755 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-12-35_239_9032004865602581317/10001 + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-12-35_239_9032004865602581317/10002 + Reduce Output Operator + sort order: + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + value expressions: + expr: key + type: string + expr: value1 + type: string + expr: value2 + type: string + Needs Tagging: false + Path -> Alias: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-12-35_239_9032004865602581317/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-12-35_239_9032004865602581317/10002] + Path -> Partition: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-12-35_239_9032004865602581317/10002 + Partition + base file name: 10002 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266462755 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266462755 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + name: bucketmapjoin_tmp_result + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-17_19-12-35_239_9032004865602581317/10000 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266462755 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + + +PREHOOK: query: drop table bucketmapjoin_tmp_result +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucketmapjoin_tmp_result +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: drop table srcbucket_mapjoin +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table srcbucket_mapjoin +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: drop table srcbucket_mapjoin_part_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table srcbucket_mapjoin_part_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part_2