Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapper.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapper.java (revision 910755) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapper.java (working copy) @@ -254,9 +254,6 @@ String currentInputFile = HiveConf.getVar(jc, HiveConf.ConfVars.HADOOPMAPFILENAME); BucketMapJoinContext bucketMatcherCxt = this.localWork.getBucketMapjoinContext(); Class bucketMatcherCls = bucketMatcherCxt.getBucketMatcherClass(); - if(bucketMatcherCls == null) { - bucketMatcherCls = org.apache.hadoop.hive.ql.exec.DefaultBucketMatcher.class; - } BucketMatcher bucketMatcher = (BucketMatcher) ReflectionUtils.newInstance(bucketMatcherCls, null); bucketMatcher.setAliasBucketFileNameMapping(bucketMatcherCxt.getAliasBucketFileNameMapping()); List aliasFiles = bucketMatcher.getAliasBucketFiles(currentInputFile, Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java (revision 910755) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java (working copy) @@ -20,6 +20,7 @@ import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.Iterator; @@ -28,6 +29,7 @@ import java.util.Map; import java.util.Set; import java.util.Stack; +import java.util.Map.Entry; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -174,15 +176,19 @@ } MapJoinDesc mjDecs = mapJoinOp.getConf(); - LinkedHashMap aliasToBucketNumber = new LinkedHashMap(); - LinkedHashMap> aliasToBucketFileNames = new LinkedHashMap>(); + LinkedHashMap aliasToBucketNumberMapping = new LinkedHashMap(); + LinkedHashMap> aliasToBucketFileNamesMapping = new LinkedHashMap>(); // right now this code does not work with "a join b on a.key = b.key and // a.ds = b.ds", where ds is a partition column. It only works with joins // with only one partition presents in each join source tables. Map> topOps = this.pGraphContext.getTopOps(); Map topToTable = this.pGraphContext.getTopToTable(); - List bucketNumbers = new ArrayList(); + // (partition to bucket file names) and (partition to bucket number) for + // the big table; + LinkedHashMap> bigTblPartsToBucketFileNames = new LinkedHashMap>(); + LinkedHashMap bigTblPartsToBucketNumber = new LinkedHashMap(); + for (int index = 0; index < joinAliases.size(); index++) { String alias = joinAliases.get(index); TableScanOperator tso = (TableScanOperator) topOps.get(alias); @@ -200,46 +206,69 @@ } int partNumber = prunedParts.getConfirmedPartns().size() + prunedParts.getUnknownPartns().size(); - if(partNumber > 1) - return null; - - Partition part = null; - Iterator iter = prunedParts.getConfirmedPartns() - .iterator(); - while (iter.hasNext()) - part = iter.next(); - if (part == null) { + + if (partNumber > 1) { + // only allow one partition for small tables + if(alias != baseBigAlias) { + return null; + } + // here is the big table,and we get more than one partitions. + // construct a mapping of (Partition->bucket file names) and + // (Partition -> bucket number) + Iterator iter = prunedParts.getConfirmedPartns() + .iterator(); + while (iter.hasNext()) { + Partition p = iter.next(); + if (!checkBucketColumns(p.getBucketCols(), mjDecs, index)) { + return null; + } + List fileNames = getOnePartitionBucketFileNames(p); + bigTblPartsToBucketFileNames.put(p, fileNames); + bigTblPartsToBucketNumber.put(p, p.getBucketCount()); + } iter = prunedParts.getUnknownPartns().iterator(); - while (iter.hasNext()) - part = iter.next(); - } - - assert part != null; - - if (!checkBucketColumns(part.getBucketCols(), mjDecs, index)) - return null; - - Integer num = new Integer(part.getBucketCount()); - aliasToBucketNumber.put(alias, num); - List fileNames = new ArrayList(); - try { - FileSystem fs = FileSystem.get(this.pGraphContext.getConf()); - FileStatus[] files = fs.listStatus(new Path(part.getDataLocation() - .toString())); - if (files != null) { - for (FileStatus file : files) { - fileNames.add(file.getPath().toString()); + while (iter.hasNext()) { + Partition p = iter.next(); + if (!checkBucketColumns(p.getBucketCols(), mjDecs, index)) { + return null; } + List fileNames = getOnePartitionBucketFileNames(p); + bigTblPartsToBucketFileNames.put(p, fileNames); + bigTblPartsToBucketNumber.put(p, p.getBucketCount()); } - } catch (IOException e) { - throw new SemanticException(e); + // If there are more than one partition for the big + // table,aliasToBucketFileNamesMapping and partsToBucketNumber will + // not contain mappings for the big table. Instead, the mappings are + // contained in bigTblPartsToBucketFileNames and + // bigTblPartsToBucketNumber + + } else { + Partition part = null; + Iterator iter = prunedParts.getConfirmedPartns() + .iterator(); + part = iter.next(); + if (part == null) { + iter = prunedParts.getUnknownPartns().iterator(); + part = iter.next(); + } + assert part != null; + Integer num = new Integer(part.getBucketCount()); + aliasToBucketNumberMapping.put(alias, num); + if (!checkBucketColumns(part.getBucketCols(), mjDecs, index)) { + return null; + } + List fileNames = getOnePartitionBucketFileNames(part); + aliasToBucketFileNamesMapping.put(alias, fileNames); + if (alias == baseBigAlias) { + bigTblPartsToBucketFileNames.put(part, fileNames); + bigTblPartsToBucketNumber.put(part, num); + } } - aliasToBucketFileNames.put(alias, fileNames); } else { if (!checkBucketColumns(tbl.getBucketCols(), mjDecs, index)) return null; Integer num = new Integer(tbl.getNumBuckets()); - aliasToBucketNumber.put(alias, num); + aliasToBucketNumberMapping.put(alias, num); List fileNames = new ArrayList(); try { FileSystem fs = FileSystem.get(this.pGraphContext.getConf()); @@ -252,61 +281,146 @@ } catch (IOException e) { throw new SemanticException(e); } - aliasToBucketFileNames.put(alias, fileNames); + aliasToBucketFileNamesMapping.put(alias, fileNames); } } // All tables or partitions are bucketed, and their bucket number is // stored in 'bucketNumbers', we need to check if the number of buckets in // the big table can be divided by no of buckets in small tables. - int bucketNoInBigTbl = aliasToBucketNumber.get(baseBigAlias).intValue(); - Iterator iter = aliasToBucketNumber.values().iterator(); - while(iter.hasNext()) { - int nxt = iter.next().intValue(); - boolean ok = (nxt >= bucketNoInBigTbl) ? nxt % bucketNoInBigTbl == 0 - : bucketNoInBigTbl % nxt == 0; - if(!ok) + if (bigTblPartsToBucketNumber.size() > 0) { + Iterator> bigTblPartToBucketNumber = bigTblPartsToBucketNumber + .entrySet().iterator(); + while (bigTblPartToBucketNumber.hasNext()) { + int bucketNumberInPart = bigTblPartToBucketNumber.next().getValue(); + if (!checkBucketNumberAgainstBigTable(aliasToBucketNumberMapping, + bucketNumberInPart)) { + return null; + } + } + } else { + int bucketNoInBigTbl = aliasToBucketNumberMapping.get(baseBigAlias).intValue(); + if (!checkBucketNumberAgainstBigTable(aliasToBucketNumberMapping, + bucketNoInBigTbl)) { return null; + } } + MapJoinDesc desc = mapJoinOp.getConf(); LinkedHashMap>> aliasBucketFileNameMapping = new LinkedHashMap>>(); - int bigTblBucketNum = aliasToBucketNumber.get(baseBigAlias); - Collections.sort(aliasToBucketFileNames.get(baseBigAlias)); + //sort bucket names for the big table + if(bigTblPartsToBucketNumber.size() > 0) { + Collection> bucketNamesAllParts = bigTblPartsToBucketFileNames.values(); + for(List partBucketNames : bucketNamesAllParts) { + Collections.sort(partBucketNames); + } + } else { + Collections.sort(aliasToBucketFileNamesMapping.get(baseBigAlias)); + } + + // go through all small tables and get the mapping from bucket file name + // in the big table to bucket file names in small tables. for (int j = 0; j < joinAliases.size(); j++) { String alias = joinAliases.get(j); if(alias.equals(baseBigAlias)) continue; - Collections.sort(aliasToBucketFileNames.get(alias)); + Collections.sort(aliasToBucketFileNamesMapping.get(alias)); LinkedHashMap> mapping = new LinkedHashMap>(); aliasBucketFileNameMapping.put(alias, mapping); - for(String inputBigTBLBucket : aliasToBucketFileNames.get(baseBigAlias)) { - int smallTblBucketNum = aliasToBucketNumber.get(alias); - ArrayList resultFileNames = new ArrayList(); - int index = aliasToBucketFileNames.get(baseBigAlias).indexOf(inputBigTBLBucket); - if (bigTblBucketNum >= smallTblBucketNum) { - int temp = bigTblBucketNum / smallTblBucketNum; - int toAddSmallIndex = index/temp; - if(toAddSmallIndex < aliasToBucketFileNames.get(alias).size()) { - resultFileNames.add(aliasToBucketFileNames.get(alias).get(toAddSmallIndex)); - } - } else { - int jump = smallTblBucketNum / bigTblBucketNum; - for (int i = index; i < aliasToBucketFileNames.get(alias).size(); i = i + jump) { - if(i <= aliasToBucketFileNames.get(alias).size()) { - resultFileNames.add(aliasToBucketFileNames.get(alias).get(i)); - } - } + + // for each bucket file in big table, get the corresponding bucket file + // name in the small table. + if (bigTblPartsToBucketNumber.size() > 0) { + //more than 1 partition in the big table, do the mapping for each partition + Iterator>> bigTblPartToBucketNames = bigTblPartsToBucketFileNames + .entrySet().iterator(); + Iterator> bigTblPartToBucketNum = bigTblPartsToBucketNumber + .entrySet().iterator(); + while (bigTblPartToBucketNames.hasNext()) { + assert bigTblPartToBucketNum.hasNext(); + int bigTblBucketNum = bigTblPartToBucketNum.next().getValue().intValue(); + List bigTblBucketNameList = bigTblPartToBucketNames.next().getValue(); + fillMapping(baseBigAlias, aliasToBucketNumberMapping, + aliasToBucketFileNamesMapping, alias, mapping, bigTblBucketNum, + bigTblBucketNameList); } - mapping.put(inputBigTBLBucket, resultFileNames); + } else { + List bigTblBucketNameList = aliasToBucketFileNamesMapping.get(baseBigAlias); + int bigTblBucketNum = aliasToBucketNumberMapping.get(baseBigAlias); + fillMapping(baseBigAlias, aliasToBucketNumberMapping, + aliasToBucketFileNamesMapping, alias, mapping, bigTblBucketNum, + bigTblBucketNameList); } } desc.setAliasBucketFileNameMapping(aliasBucketFileNameMapping); desc.setBigTableAlias(baseBigAlias); return null; } + + private void fillMapping(String baseBigAlias, + LinkedHashMap aliasToBucketNumberMapping, + LinkedHashMap> aliasToBucketFileNamesMapping, + String alias, LinkedHashMap> mapping, + int bigTblBucketNum, List bigTblBucketNameList) { + for (int index = 0; index < bigTblBucketNameList.size(); index++) { + String inputBigTBLBucket = bigTblBucketNameList.get(index); + int smallTblBucketNum = aliasToBucketNumberMapping.get(alias); + ArrayList resultFileNames = new ArrayList(); + if (bigTblBucketNum >= smallTblBucketNum) { + // if the big table has more buckets than the current small table, + // use "MOD" to get small table bucket names. For example, if the big + // table has 4 buckets and the small table has 2 buckets, then the + // mapping should be 0->0, 1->1, 2->0, 3->1. + int toAddSmallIndex = index % smallTblBucketNum; + if(toAddSmallIndex < aliasToBucketFileNamesMapping.get(alias).size()) { + resultFileNames.add(aliasToBucketFileNamesMapping.get(alias).get(toAddSmallIndex)); + } + } else { + int jump = smallTblBucketNum / bigTblBucketNum; + for (int i = index; i < aliasToBucketFileNamesMapping.get(alias).size(); i = i + jump) { + if(i <= aliasToBucketFileNamesMapping.get(alias).size()) { + resultFileNames.add(aliasToBucketFileNamesMapping.get(alias).get(i)); + } + } + } + mapping.put(inputBigTBLBucket, resultFileNames); + } + } + + private boolean checkBucketNumberAgainstBigTable( + LinkedHashMap aliasToBucketNumber, + int bucketNumberInPart) { + Iterator iter = aliasToBucketNumber.values().iterator(); + while(iter.hasNext()) { + int nxt = iter.next().intValue(); + boolean ok = (nxt >= bucketNumberInPart) ? nxt % bucketNumberInPart == 0 + : bucketNumberInPart % nxt == 0; + if(!ok) + return false; + } + return true; + } + + private List getOnePartitionBucketFileNames(Partition part) + throws SemanticException { + List fileNames = new ArrayList(); + try { + FileSystem fs = FileSystem.get(this.pGraphContext.getConf()); + FileStatus[] files = fs.listStatus(new Path(part.getDataLocation() + .toString())); + if (files != null) { + for (FileStatus file : files) { + fileNames.add(file.getPath().toString()); + } + } + } catch (IOException e) { + throw new SemanticException(e); + } + return fileNames; + } private boolean checkBucketColumns(List bucketColumns, MapJoinDesc mjDesc, int index) { List keys = mjDesc.getKeys().get((byte)index); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (revision 910755) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (working copy) @@ -227,16 +227,23 @@ private static void setupBucketMapJoinInfo(MapredWork plan, MapJoinOperator currMapJoinOp) { - MapredLocalWork localPlan = plan.getMapLocalWork(); - if (localPlan != null && currMapJoinOp != null) { + if (currMapJoinOp != null) { LinkedHashMap>> aliasBucketFileNameMapping = currMapJoinOp.getConf().getAliasBucketFileNameMapping(); if(aliasBucketFileNameMapping!= null) { + MapredLocalWork localPlan = plan.getMapLocalWork(); + if (localPlan == null) { + localPlan = new MapredLocalWork( + new LinkedHashMap>(), + new LinkedHashMap()); + plan.setMapLocalWork(localPlan); + } BucketMapJoinContext bucketMJCxt = new BucketMapJoinContext(); localPlan.setBucketMapjoinContext(bucketMJCxt); bucketMJCxt.setAliasBucketFileNameMapping(aliasBucketFileNameMapping); localPlan.setInputFileChangeSensitive(true); bucketMJCxt.setMapJoinBigTableAlias(currMapJoinOp.getConf().getBigTableAlias()); + bucketMJCxt.setBucketMatcherClass(org.apache.hadoop.hive.ql.exec.DefaultBucketMatcher.class); } } } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java (revision 910755) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java (working copy) @@ -18,10 +18,15 @@ package org.apache.hadoop.hive.ql.plan; +import java.io.File; import java.io.Serializable; +import java.net.URI; import java.util.ArrayList; +import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; +import java.util.Map.Entry; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.BucketMatcher; @@ -84,6 +89,12 @@ this.inputFileChangeSensitive = inputFileChangeSensitive; } + public void deriveExplainAttributes() { + if (bucketMapjoinContext != null) { + bucketMapjoinContext.deriveBucketMapJoinMapping(); + } + } + @Explain(displayName = "Bucket Mapjoin Context", normalExplain = false) public BucketMapJoinContext getBucketMapjoinContext() { return bucketMapjoinContext; @@ -101,11 +112,64 @@ private LinkedHashMap>> aliasBucketFileNameMapping; private String mapJoinBigTableAlias; private Class bucketMatcherClass; + + private LinkedHashMap>> aliasBucketBaseFileNameMapping; public void setMapJoinBigTableAlias(String bigTableAlias) { this.mapJoinBigTableAlias = bigTableAlias; } + + public void deriveBucketMapJoinMapping() { + if (aliasBucketFileNameMapping != null) { + Iterator>>> iter = + aliasBucketFileNameMapping.entrySet().iterator(); + aliasBucketBaseFileNameMapping = new LinkedHashMap>>(); + + while (iter.hasNext()) { + Entry>> old = iter.next(); + + LinkedHashMap> newBucketBaseFileNameMapping = new LinkedHashMap>(); + Iterator>> oldAliasFileNameMappingIter = old.getValue().entrySet().iterator(); + while (oldAliasFileNameMappingIter.hasNext()) { + //For a give table and its bucket full file path list, only keep the base file name (remove file path etc). + //And put the new list into the new mapping. + Entry> oldTableBucketFileNames = oldAliasFileNameMappingIter.next(); + ArrayList oldTableBucketNames = oldTableBucketFileNames.getValue(); + ArrayList newTableBucketFileBaseName = new ArrayList (oldTableBucketNames.size()); + //for each bucket file, only keep its base files and store into a new list. + if (oldTableBucketNames != null) { + for (String bucketFName : oldTableBucketNames) { + newTableBucketFileBaseName.add(getBaseFileName(bucketFName)); + } + } + String bigTblBucketFileName = getBaseFileName(oldTableBucketFileNames.getKey()); + if(newBucketBaseFileNameMapping.containsKey(bigTblBucketFileName)) { + String fullPath = oldTableBucketFileNames.getKey(); + String dir = getBaseFileName(fullPath.substring(0, fullPath.lastIndexOf(bigTblBucketFileName))); + bigTblBucketFileName = dir + File.separator + bigTblBucketFileName; + } + //put the new mapping + newBucketBaseFileNameMapping.put(bigTblBucketFileName, newTableBucketFileBaseName); + } + String tableAlias = old.getKey(); + aliasBucketBaseFileNameMapping.put(tableAlias, newBucketBaseFileNameMapping); + } + } + } + + private String getBaseFileName (String path) { + try { + URI uri = new URI(path); + File file = new File(uri); + return file.getName(); + } catch (Exception ex) { + // This could be due to either URI syntax error or File constructor + // illegal arg; we don't really care which one it is. + return path; + } + } + public String getMapJoinBigTableAlias() { return mapJoinBigTableAlias; } @@ -123,7 +187,7 @@ public LinkedHashMap>> getAliasBucketFileNameMapping() { return aliasBucketFileNameMapping; } - + public void setAliasBucketFileNameMapping( LinkedHashMap>> aliasBucketFileNameMapping) { this.aliasBucketFileNameMapping = aliasBucketFileNameMapping; @@ -135,5 +199,15 @@ else return ""; } + + @Explain(displayName = "Alias Bucket Base File Name Mapping", normalExplain = false) + public LinkedHashMap>> getAliasBucketBaseFileNameMapping() { + return aliasBucketBaseFileNameMapping; + } + + public void setAliasBucketBaseFileNameMapping( + LinkedHashMap>> aliasBucketBaseFileNameMapping) { + this.aliasBucketBaseFileNameMapping = aliasBucketBaseFileNameMapping; + } } } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java (revision 909965) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java (working copy) @@ -270,12 +270,14 @@ * Derive additional attributes to be rendered by EXPLAIN. */ public void deriveExplainAttributes() { - if (pathToPartitionInfo == null) { - return; + if (pathToPartitionInfo != null) { + for (Map.Entry entry : pathToPartitionInfo + .entrySet()) { + entry.getValue().deriveBaseFileName(entry.getKey()); + } } - for (Map.Entry entry : pathToPartitionInfo - .entrySet()) { - entry.getValue().deriveBaseFileName(entry.getKey()); + if (mapLocalWork != null) { + mapLocalWork.deriveExplainAttributes(); } } Index: ql/src/test/queries/clientpositive/bucketmapjoin1.q =================================================================== --- ql/src/test/queries/clientpositive/bucketmapjoin1.q (revision 911664) +++ ql/src/test/queries/clientpositive/bucketmapjoin1.q (working copy) @@ -12,6 +12,8 @@ load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); +create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint); +create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint); set hive.optimize.bucketmapjoin = true; create table bucketmapjoin_tmp_result (key string , value1 string, value2 string); @@ -29,7 +31,26 @@ select count(1) from bucketmapjoin_tmp_result; +insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; + +set hive.optimize.bucketmapjoin = false; +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08"; +select count(1) from bucketmapjoin_tmp_result; +insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; + + +select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key; + + +set hive.optimize.bucketmapjoin = true; explain extended insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value @@ -44,6 +65,26 @@ select count(1) from bucketmapjoin_tmp_result; +insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; + +set hive.optimize.bucketmapjoin = false; +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08"; + +select count(1) from bucketmapjoin_tmp_result; +insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; + +select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key; + + +drop table bucketmapjoin_hash_result_2; +drop table bucketmapjoin_hash_result_1; drop table bucketmapjoin_tmp_result; drop table srcbucket_mapjoin; drop table srcbucket_mapjoin_part; Index: ql/src/test/queries/clientpositive/bucketmapjoin2.q =================================================================== --- ql/src/test/queries/clientpositive/bucketmapjoin2.q (revision 911664) +++ ql/src/test/queries/clientpositive/bucketmapjoin2.q (working copy) @@ -12,11 +12,12 @@ load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); +create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint); +create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint); set hive.optimize.bucketmapjoin = true; create table bucketmapjoin_tmp_result (key string , value1 string, value2 string); - explain extended insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value @@ -29,8 +30,25 @@ on a.key=b.key and b.ds="2008-04-08"; select count(1) from bucketmapjoin_tmp_result; +insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; + + +set hive.optimize.bucketmapjoin = false; +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key and b.ds="2008-04-08"; + +select count(1) from bucketmapjoin_tmp_result; +insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; +select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key; +set hive.optimize.bucketmapjoin = true; explain extended insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value @@ -43,7 +61,26 @@ on a.key=b.key and b.ds="2008-04-08"; select count(1) from bucketmapjoin_tmp_result; +insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; + +set hive.optimize.bucketmapjoin = false; +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key and b.ds="2008-04-08"; + +select count(1) from bucketmapjoin_tmp_result; +insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; + +select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key; + +drop table bucketmapjoin_hash_result_2; +drop table bucketmapjoin_hash_result_1; drop table bucketmapjoin_tmp_result; drop table srcbucket_mapjoin; drop table srcbucket_mapjoin_part; Index: ql/src/test/queries/clientpositive/bucketmapjoin3.q =================================================================== --- ql/src/test/queries/clientpositive/bucketmapjoin3.q (revision 911664) +++ ql/src/test/queries/clientpositive/bucketmapjoin3.q (working copy) @@ -12,6 +12,8 @@ load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); +create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint); +create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint); set hive.optimize.bucketmapjoin = true; create table bucketmapjoin_tmp_result (key string , value1 string, value2 string); @@ -28,8 +30,24 @@ on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08"; select count(1) from bucketmapjoin_tmp_result; +insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; + +set hive.optimize.bucketmapjoin = false; +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08"; + +select count(1) from bucketmapjoin_tmp_result; +insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; +select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key; +set hive.optimize.bucketmapjoin = true; explain extended insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value @@ -42,8 +60,25 @@ on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08"; select count(1) from bucketmapjoin_tmp_result; +insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; +set hive.optimize.bucketmapjoin = false; +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08"; +select count(1) from bucketmapjoin_tmp_result; +insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; + +select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key; + +drop table bucketmapjoin_hash_result_2; +drop table bucketmapjoin_hash_result_1; drop table bucketmapjoin_tmp_result; drop table srcbucket_mapjoin; drop table srcbucket_mapjoin_part; Index: ql/src/test/queries/clientpositive/bucketmapjoin4.q =================================================================== --- ql/src/test/queries/clientpositive/bucketmapjoin4.q (revision 911664) +++ ql/src/test/queries/clientpositive/bucketmapjoin4.q (working copy) @@ -12,11 +12,12 @@ load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); +create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint); +create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint); set hive.optimize.bucketmapjoin = true; create table bucketmapjoin_tmp_result (key string , value1 string, value2 string); - explain extended insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value @@ -29,7 +30,25 @@ on a.key=b.key; select count(1) from bucketmapjoin_tmp_result; +insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; + +set hive.optimize.bucketmapjoin = false; +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin b +on a.key=b.key; + +select count(1) from bucketmapjoin_tmp_result; +insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; + +select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key; + +set hive.optimize.bucketmapjoin = true; explain extended insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value @@ -42,8 +61,25 @@ on a.key=b.key; select count(1) from bucketmapjoin_tmp_result; +insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; +set hive.optimize.bucketmapjoin = false; +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin b +on a.key=b.key; + +select count(1) from bucketmapjoin_tmp_result; +insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; +select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key; + +drop table bucketmapjoin_hash_result_2; +drop table bucketmapjoin_hash_result_1; drop table bucketmapjoin_tmp_result; drop table srcbucket_mapjoin; drop table srcbucket_mapjoin_part; Index: ql/src/test/queries/clientpositive/bucketmapjoin5.q =================================================================== --- ql/src/test/queries/clientpositive/bucketmapjoin5.q (revision 0) +++ ql/src/test/queries/clientpositive/bucketmapjoin5.q (revision 0) @@ -0,0 +1,91 @@ +CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin; +load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin; + +CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09'); +load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09'); +load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09'); +load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09'); + +CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); +load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); +load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09'); +load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09'); + +create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint); +create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint); + +set hive.optimize.bucketmapjoin = true; +create table bucketmapjoin_tmp_result (key string , value1 string, value2 string); + +explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key; + +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key; + +select count(1) from bucketmapjoin_tmp_result; +insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; + +set hive.optimize.bucketmapjoin = false; +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key; + +select count(1) from bucketmapjoin_tmp_result; +insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; + +select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key; + + +set hive.optimize.bucketmapjoin = true; +explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key; + +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key; + +select count(1) from bucketmapjoin_tmp_result; +insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; + +set hive.optimize.bucketmapjoin = false; +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key; + +select count(1) from bucketmapjoin_tmp_result; +insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; + +select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key; + + +drop table bucketmapjoin_tmp_result; +drop table srcbucket_mapjoin; +drop table srcbucket_mapjoin_part; +drop table srcbucket_mapjoin_part_2; \ No newline at end of file Index: ql/src/test/queries/clientpositive/bucketmapjoin_negative.q =================================================================== --- ql/src/test/queries/clientpositive/bucketmapjoin_negative.q (revision 0) +++ ql/src/test/queries/clientpositive/bucketmapjoin_negative.q (revision 0) @@ -0,0 +1,23 @@ +CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin; +load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin; + +CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE; +load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); + + + +set hive.optimize.bucketmapjoin = true; +create table bucketmapjoin_tmp_result (key string , value1 string, value2 string); + +explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08"; + +drop table bucketmapjoin_tmp_result; +drop table srcbucket_mapjoin; +drop table srcbucket_mapjoin_part; \ No newline at end of file Index: ql/src/test/queries/clientpositive/bucketmapjoin_negative2.q =================================================================== --- ql/src/test/queries/clientpositive/bucketmapjoin_negative2.q (revision 0) +++ ql/src/test/queries/clientpositive/bucketmapjoin_negative2.q (revision 0) @@ -0,0 +1,22 @@ +CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin; +load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin; + +CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); +load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); +load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09'); +load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09'); + +set hive.optimize.bucketmapjoin = true; +create table bucketmapjoin_tmp_result (key string , value1 string, value2 string); + +explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key; + +drop table bucketmapjoin_tmp_result; +drop table srcbucket_mapjoin; +drop table srcbucket_mapjoin_part_2; \ No newline at end of file Index: ql/src/test/results/clientpositive/bucketmapjoin1.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin1.q.out (revision 911664) +++ ql/src/test/results/clientpositive/bucketmapjoin1.q.out (working copy) @@ -53,6 +53,16 @@ POSTHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') POSTHOOK: type: LOAD POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: query: create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 +PREHOOK: query: create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 PREHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) PREHOOK: type: CREATETABLE POSTHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) @@ -127,7 +137,7 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-26-32_403_4982018782937064172/10002 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-32-45_651_5720285813227132874/10002 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -138,12 +148,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449192 + transient_lastDdlTime 1266607965 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -203,7 +213,7 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-26-32_403_4982018782937064172/10002 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-32-45_651_5720285813227132874/10002 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -214,24 +224,26 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449192 + transient_lastDdlTime 1266607965 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 MultiFileSpray: false Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + b {srcbucket20.txt=[srcbucket20.txt, srcbucket22.txt], srcbucket21.txt=[srcbucket21.txt, srcbucket23.txt]} Alias Bucket File Name Mapping: - b {file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt=[file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt, file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt], file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt=[file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt, file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt]} + b {file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt, file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt, file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt]} Needs Tagging: false Path -> Alias: - file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin [a] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin [a] Path -> Partition: - file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin Partition base file name: srcbucket_mapjoin input format: org.apache.hadoop.mapred.TextInputFormat @@ -243,12 +255,12 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin name srcbucket_mapjoin serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449190 + transient_lastDdlTime 1266607963 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -260,12 +272,12 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin name srcbucket_mapjoin serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449190 + transient_lastDdlTime 1266607963 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcbucket_mapjoin name: srcbucket_mapjoin @@ -277,14 +289,14 @@ Move Operator files: hdfs directory: true - source: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-26-32_403_4982018782937064172/10002 - destination: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-26-32_403_4982018782937064172/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-32-45_651_5720285813227132874/10002 + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-32-45_651_5720285813227132874/10000 Stage: Stage-0 Move Operator tables: replace: true - source: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-26-32_403_4982018782937064172/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-32-45_651_5720285813227132874/10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -294,20 +306,20 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449192 + transient_lastDdlTime 1266607965 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result - tmp directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-26-32_403_4982018782937064172/10001 + tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-32-45_651_5720285813227132874/10001 Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-26-32_403_4982018782937064172/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-32-45_651_5720285813227132874/10002 Reduce Output Operator sort order: Map-reduce partition columns: @@ -323,9 +335,9 @@ type: string Needs Tagging: false Path -> Alias: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-26-32_403_4982018782937064172/10002 [file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-26-32_403_4982018782937064172/10002] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-32-45_651_5720285813227132874/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-32-45_651_5720285813227132874/10002] Path -> Partition: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-26-32_403_4982018782937064172/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-32-45_651_5720285813227132874/10002 Partition base file name: 10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -336,12 +348,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449192 + transient_lastDdlTime 1266607965 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -352,12 +364,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449192 + transient_lastDdlTime 1266607965 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result name: bucketmapjoin_tmp_result @@ -366,7 +378,7 @@ File Output Operator compressed: false GlobalTableId: 0 - directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-26-32_403_4982018782937064172/10000 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-32-45_651_5720285813227132874/10000 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -377,12 +389,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449192 + transient_lastDdlTime 1266607965 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -408,12 +420,72 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-26-40_409_8227758911905677185/10000 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-33-40_529_7309307251449693243/10000 +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-33-40_529_7309307251449693243/10000 +464 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_1 +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-34-13_504_2943830036603525744/10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-26-40_409_8227758911905677185/10000 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-34-13_504_2943830036603525744/10000 464 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_2 +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +PREHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_hash_result_2 +PREHOOK: Input: default@bucketmapjoin_hash_result_1 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-34-24_292_8596094265458408364/10000 +POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_hash_result_2 +POSTHOOK: Input: default@bucketmapjoin_hash_result_1 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-34-24_292_8596094265458408364/10000 +0 0 0 PREHOOK: query: explain extended insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value @@ -488,7 +560,7 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-26-44_802_8678513451756118730/10002 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-34-30_210_4006386356890896710/10002 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -499,12 +571,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449192 + transient_lastDdlTime 1266607965 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -559,7 +631,7 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-26-44_802_8678513451756118730/10002 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-34-30_210_4006386356890896710/10002 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -570,21 +642,26 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449192 + transient_lastDdlTime 1266607965 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 MultiFileSpray: false + Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + a {srcbucket20.txt=[srcbucket20.txt], srcbucket21.txt=[srcbucket21.txt], srcbucket22.txt=[srcbucket20.txt], srcbucket23.txt=[srcbucket21.txt]} + Alias Bucket File Name Mapping: + a {file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt]} Needs Tagging: false Path -> Alias: - file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 [b] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 [b] Path -> Partition: - file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 Partition base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat @@ -598,13 +675,13 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin_part + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part name srcbucket_mapjoin_part partition_columns ds serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449191 + transient_lastDdlTime 1266607963 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -616,13 +693,13 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin_part + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part name srcbucket_mapjoin_part partition_columns ds serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449191 + transient_lastDdlTime 1266607963 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcbucket_mapjoin_part name: srcbucket_mapjoin_part @@ -634,14 +711,14 @@ Move Operator files: hdfs directory: true - source: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-26-44_802_8678513451756118730/10002 - destination: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-26-44_802_8678513451756118730/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-34-30_210_4006386356890896710/10002 + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-34-30_210_4006386356890896710/10000 Stage: Stage-0 Move Operator tables: replace: true - source: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-26-44_802_8678513451756118730/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-34-30_210_4006386356890896710/10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -651,20 +728,20 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449192 + transient_lastDdlTime 1266607965 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result - tmp directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-26-44_802_8678513451756118730/10001 + tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-34-30_210_4006386356890896710/10001 Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-26-44_802_8678513451756118730/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-34-30_210_4006386356890896710/10002 Reduce Output Operator sort order: Map-reduce partition columns: @@ -680,9 +757,9 @@ type: string Needs Tagging: false Path -> Alias: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-26-44_802_8678513451756118730/10002 [file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-26-44_802_8678513451756118730/10002] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-34-30_210_4006386356890896710/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-34-30_210_4006386356890896710/10002] Path -> Partition: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-26-44_802_8678513451756118730/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-34-30_210_4006386356890896710/10002 Partition base file name: 10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -693,12 +770,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449192 + transient_lastDdlTime 1266607965 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -709,12 +786,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449192 + transient_lastDdlTime 1266607965 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result name: bucketmapjoin_tmp_result @@ -723,7 +800,7 @@ File Output Operator compressed: false GlobalTableId: 0 - directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-26-44_802_8678513451756118730/10000 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-34-30_210_4006386356890896710/10000 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -734,12 +811,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449192 + transient_lastDdlTime 1266607965 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -765,12 +842,82 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-26-53_938_452605270621304413/10000 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-34-46_749_4368271864493620888/10000 +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-34-46_749_4368271864493620888/10000 +464 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_1 +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-35-16_173_3799786717539062143/10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-26-53_938_452605270621304413/10000 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-35-16_173_3799786717539062143/10000 464 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_2 +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +PREHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_hash_result_2 +PREHOOK: Input: default@bucketmapjoin_hash_result_1 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-35-24_981_4812298821055680946/10000 +POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_hash_result_2 +POSTHOOK: Input: default@bucketmapjoin_hash_result_1 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-35-24_981_4812298821055680946/10000 +0 0 0 +PREHOOK: query: drop table bucketmapjoin_hash_result_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucketmapjoin_hash_result_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +PREHOOK: query: drop table bucketmapjoin_hash_result_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucketmapjoin_hash_result_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 PREHOOK: query: drop table bucketmapjoin_tmp_result PREHOOK: type: DROPTABLE POSTHOOK: query: drop table bucketmapjoin_tmp_result Index: ql/src/test/results/clientpositive/bucketmapjoin2.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin2.q.out (revision 911664) +++ ql/src/test/results/clientpositive/bucketmapjoin2.q.out (working copy) @@ -53,6 +53,16 @@ POSTHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') POSTHOOK: type: LOAD POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: query: create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 +PREHOOK: query: create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 PREHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) PREHOOK: type: CREATETABLE POSTHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) @@ -120,7 +130,7 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-02_156_8504430090802925489/10002 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-50-22_114_5685049556718200755/10002 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -131,12 +141,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449222 + transient_lastDdlTime 1266609022 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -194,7 +204,7 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-02_156_8504430090802925489/10002 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-50-22_114_5685049556718200755/10002 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -205,24 +215,26 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449222 + transient_lastDdlTime 1266609022 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 MultiFileSpray: false Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + b {srcbucket20.txt=[srcbucket22.txt], srcbucket21.txt=[srcbucket23.txt]} Alias Bucket File Name Mapping: - b {file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt=[file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt], file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt=[file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt]} + b {file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt]} Needs Tagging: false Path -> Alias: - file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin [a] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin [a] Path -> Partition: - file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin Partition base file name: srcbucket_mapjoin input format: org.apache.hadoop.mapred.TextInputFormat @@ -234,12 +246,12 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin name srcbucket_mapjoin serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449220 + transient_lastDdlTime 1266609019 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -251,12 +263,12 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin name srcbucket_mapjoin serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449220 + transient_lastDdlTime 1266609019 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcbucket_mapjoin name: srcbucket_mapjoin @@ -268,14 +280,14 @@ Move Operator files: hdfs directory: true - source: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-02_156_8504430090802925489/10002 - destination: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-02_156_8504430090802925489/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-50-22_114_5685049556718200755/10002 + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-50-22_114_5685049556718200755/10000 Stage: Stage-0 Move Operator tables: replace: true - source: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-02_156_8504430090802925489/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-50-22_114_5685049556718200755/10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -285,20 +297,20 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449222 + transient_lastDdlTime 1266609022 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result - tmp directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-02_156_8504430090802925489/10001 + tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-50-22_114_5685049556718200755/10001 Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-02_156_8504430090802925489/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-50-22_114_5685049556718200755/10002 Reduce Output Operator sort order: Map-reduce partition columns: @@ -314,9 +326,9 @@ type: string Needs Tagging: false Path -> Alias: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-02_156_8504430090802925489/10002 [file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-02_156_8504430090802925489/10002] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-50-22_114_5685049556718200755/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-50-22_114_5685049556718200755/10002] Path -> Partition: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-02_156_8504430090802925489/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-50-22_114_5685049556718200755/10002 Partition base file name: 10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -327,12 +339,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449222 + transient_lastDdlTime 1266609022 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -343,12 +355,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449222 + transient_lastDdlTime 1266609022 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result name: bucketmapjoin_tmp_result @@ -357,7 +369,7 @@ File Output Operator compressed: false GlobalTableId: 0 - directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-02_156_8504430090802925489/10000 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-50-22_114_5685049556718200755/10000 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -368,12 +380,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449222 + transient_lastDdlTime 1266609022 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -399,12 +411,72 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-09_684_3122080475091290193/10000 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-50-40_635_8569673925161198262/10000 +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-50-40_635_8569673925161198262/10000 +0 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_1 +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key and b.ds="2008-04-08" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key and b.ds="2008-04-08" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-50-59_949_7805354233228464790/10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-09_684_3122080475091290193/10000 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-50-59_949_7805354233228464790/10000 0 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_2 +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +PREHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_hash_result_2 +PREHOOK: Input: default@bucketmapjoin_hash_result_1 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-51-08_950_3309411335880304858/10000 +POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_hash_result_2 +POSTHOOK: Input: default@bucketmapjoin_hash_result_1 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-51-08_950_3309411335880304858/10000 +NULL NULL NULL PREHOOK: query: explain extended insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value @@ -477,7 +549,7 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-13_026_1200606493732537610/10002 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-51-14_717_4570401978130192078/10002 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -488,12 +560,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449222 + transient_lastDdlTime 1266609022 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -541,7 +613,7 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-13_026_1200606493732537610/10002 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-51-14_717_4570401978130192078/10002 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -552,21 +624,26 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449222 + transient_lastDdlTime 1266609022 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 MultiFileSpray: false + Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + a {srcbucket22.txt=[srcbucket20.txt], srcbucket23.txt=[srcbucket21.txt]} + Alias Bucket File Name Mapping: + a {file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt]} Needs Tagging: false Path -> Alias: - file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08 [b] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08 [b] Path -> Partition: - file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08 Partition base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat @@ -580,13 +657,13 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2 + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2 name srcbucket_mapjoin_part_2 partition_columns ds serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449221 + transient_lastDdlTime 1266609021 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -598,13 +675,13 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2 + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2 name srcbucket_mapjoin_part_2 partition_columns ds serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449221 + transient_lastDdlTime 1266609021 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcbucket_mapjoin_part_2 name: srcbucket_mapjoin_part_2 @@ -616,14 +693,14 @@ Move Operator files: hdfs directory: true - source: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-13_026_1200606493732537610/10002 - destination: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-13_026_1200606493732537610/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-51-14_717_4570401978130192078/10002 + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-51-14_717_4570401978130192078/10000 Stage: Stage-0 Move Operator tables: replace: true - source: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-13_026_1200606493732537610/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-51-14_717_4570401978130192078/10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -633,20 +710,20 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449222 + transient_lastDdlTime 1266609022 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result - tmp directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-13_026_1200606493732537610/10001 + tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-51-14_717_4570401978130192078/10001 Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-13_026_1200606493732537610/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-51-14_717_4570401978130192078/10002 Reduce Output Operator sort order: Map-reduce partition columns: @@ -662,9 +739,9 @@ type: string Needs Tagging: false Path -> Alias: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-13_026_1200606493732537610/10002 [file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-13_026_1200606493732537610/10002] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-51-14_717_4570401978130192078/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-51-14_717_4570401978130192078/10002] Path -> Partition: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-13_026_1200606493732537610/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-51-14_717_4570401978130192078/10002 Partition base file name: 10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -675,12 +752,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449222 + transient_lastDdlTime 1266609022 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -691,12 +768,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449222 + transient_lastDdlTime 1266609022 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result name: bucketmapjoin_tmp_result @@ -705,7 +782,7 @@ File Output Operator compressed: false GlobalTableId: 0 - directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-13_026_1200606493732537610/10000 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-51-14_717_4570401978130192078/10000 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -716,12 +793,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449222 + transient_lastDdlTime 1266609022 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -747,12 +824,82 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-21_012_5331298714594796801/10000 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-51-24_366_6146696378003552872/10000 +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-51-24_366_6146696378003552872/10000 +0 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_1 +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key and b.ds="2008-04-08" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key and b.ds="2008-04-08" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-51-49_208_404696104645079786/10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-21_012_5331298714594796801/10000 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-51-49_208_404696104645079786/10000 0 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_2 +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +PREHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_hash_result_2 +PREHOOK: Input: default@bucketmapjoin_hash_result_1 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-51-57_967_8324054071428985023/10000 +POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_hash_result_2 +POSTHOOK: Input: default@bucketmapjoin_hash_result_1 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-51-57_967_8324054071428985023/10000 +NULL NULL NULL +PREHOOK: query: drop table bucketmapjoin_hash_result_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucketmapjoin_hash_result_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +PREHOOK: query: drop table bucketmapjoin_hash_result_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucketmapjoin_hash_result_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 PREHOOK: query: drop table bucketmapjoin_tmp_result PREHOOK: type: DROPTABLE POSTHOOK: query: drop table bucketmapjoin_tmp_result Index: ql/src/test/results/clientpositive/bucketmapjoin3.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin3.q.out (revision 911664) +++ ql/src/test/results/clientpositive/bucketmapjoin3.q.out (working copy) @@ -53,6 +53,16 @@ POSTHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') POSTHOOK: type: LOAD POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: query: create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 +PREHOOK: query: create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 PREHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) PREHOOK: type: CREATETABLE POSTHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) @@ -130,7 +140,7 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-29_479_5856176757023458430/10002 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-56-37_229_8485850196062448955/10002 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -141,12 +151,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449249 + transient_lastDdlTime 1266609397 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -204,7 +214,7 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-29_479_5856176757023458430/10002 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-56-37_229_8485850196062448955/10002 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -215,24 +225,26 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449249 + transient_lastDdlTime 1266609397 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 MultiFileSpray: false Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + b {srcbucket22.txt=[srcbucket20.txt, srcbucket22.txt], srcbucket23.txt=[srcbucket21.txt, srcbucket23.txt]} Alias Bucket File Name Mapping: - b {file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt=[file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt, file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt], file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt=[file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt, file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt]} + b {file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt, file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt, file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt]} Needs Tagging: false Path -> Alias: - file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08 [a] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08 [a] Path -> Partition: - file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08 Partition base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat @@ -246,13 +258,13 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2 + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2 name srcbucket_mapjoin_part_2 partition_columns ds serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449249 + transient_lastDdlTime 1266609396 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -264,13 +276,13 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2 + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2 name srcbucket_mapjoin_part_2 partition_columns ds serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449249 + transient_lastDdlTime 1266609396 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcbucket_mapjoin_part_2 name: srcbucket_mapjoin_part_2 @@ -282,14 +294,14 @@ Move Operator files: hdfs directory: true - source: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-29_479_5856176757023458430/10002 - destination: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-29_479_5856176757023458430/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-56-37_229_8485850196062448955/10002 + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-56-37_229_8485850196062448955/10000 Stage: Stage-0 Move Operator tables: replace: true - source: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-29_479_5856176757023458430/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-56-37_229_8485850196062448955/10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -299,20 +311,20 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449249 + transient_lastDdlTime 1266609397 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result - tmp directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-29_479_5856176757023458430/10001 + tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-56-37_229_8485850196062448955/10001 Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-29_479_5856176757023458430/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-56-37_229_8485850196062448955/10002 Reduce Output Operator sort order: Map-reduce partition columns: @@ -328,9 +340,9 @@ type: string Needs Tagging: false Path -> Alias: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-29_479_5856176757023458430/10002 [file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-29_479_5856176757023458430/10002] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-56-37_229_8485850196062448955/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-56-37_229_8485850196062448955/10002] Path -> Partition: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-29_479_5856176757023458430/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-56-37_229_8485850196062448955/10002 Partition base file name: 10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -341,12 +353,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449249 + transient_lastDdlTime 1266609397 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -357,12 +369,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449249 + transient_lastDdlTime 1266609397 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result name: bucketmapjoin_tmp_result @@ -371,7 +383,7 @@ File Output Operator compressed: false GlobalTableId: 0 - directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-29_479_5856176757023458430/10000 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-56-37_229_8485850196062448955/10000 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -382,12 +394,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449249 + transient_lastDdlTime 1266609397 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -413,12 +425,72 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-40_366_983557285282275690/10000 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-56-55_675_87981444967122492/10000 +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-56-55_675_87981444967122492/10000 +564 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_1 +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-57-17_859_7452928906719746215/10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-40_366_983557285282275690/10000 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-57-17_859_7452928906719746215/10000 564 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_2 +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +PREHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_hash_result_2 +PREHOOK: Input: default@bucketmapjoin_hash_result_1 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-57-27_860_629807601940185495/10000 +POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_hash_result_2 +POSTHOOK: Input: default@bucketmapjoin_hash_result_1 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-57-27_860_629807601940185495/10000 +0 0 0 PREHOOK: query: explain extended insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value @@ -491,7 +563,7 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-43_810_7900906330280987524/10002 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-57-33_147_375226016152069552/10002 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -502,12 +574,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449249 + transient_lastDdlTime 1266609397 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -565,7 +637,7 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-43_810_7900906330280987524/10002 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-57-33_147_375226016152069552/10002 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -576,21 +648,26 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449249 + transient_lastDdlTime 1266609397 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 MultiFileSpray: false + Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + a {srcbucket20.txt=[srcbucket22.txt], srcbucket21.txt=[srcbucket23.txt], srcbucket22.txt=[srcbucket22.txt], srcbucket23.txt=[srcbucket23.txt]} + Alias Bucket File Name Mapping: + a {file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt]} Needs Tagging: false Path -> Alias: - file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 [b] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 [b] Path -> Partition: - file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 Partition base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat @@ -604,13 +681,13 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin_part + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part name srcbucket_mapjoin_part partition_columns ds serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449248 + transient_lastDdlTime 1266609395 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -622,13 +699,13 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin_part + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part name srcbucket_mapjoin_part partition_columns ds serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449248 + transient_lastDdlTime 1266609395 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcbucket_mapjoin_part name: srcbucket_mapjoin_part @@ -640,14 +717,14 @@ Move Operator files: hdfs directory: true - source: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-43_810_7900906330280987524/10002 - destination: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-43_810_7900906330280987524/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-57-33_147_375226016152069552/10002 + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-57-33_147_375226016152069552/10000 Stage: Stage-0 Move Operator tables: replace: true - source: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-43_810_7900906330280987524/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-57-33_147_375226016152069552/10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -657,20 +734,20 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449249 + transient_lastDdlTime 1266609397 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result - tmp directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-43_810_7900906330280987524/10001 + tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-57-33_147_375226016152069552/10001 Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-43_810_7900906330280987524/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-57-33_147_375226016152069552/10002 Reduce Output Operator sort order: Map-reduce partition columns: @@ -686,9 +763,9 @@ type: string Needs Tagging: false Path -> Alias: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-43_810_7900906330280987524/10002 [file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-43_810_7900906330280987524/10002] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-57-33_147_375226016152069552/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-57-33_147_375226016152069552/10002] Path -> Partition: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-43_810_7900906330280987524/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-57-33_147_375226016152069552/10002 Partition base file name: 10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -699,12 +776,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449249 + transient_lastDdlTime 1266609397 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -715,12 +792,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449249 + transient_lastDdlTime 1266609397 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result name: bucketmapjoin_tmp_result @@ -729,7 +806,7 @@ File Output Operator compressed: false GlobalTableId: 0 - directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-43_810_7900906330280987524/10000 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-57-33_147_375226016152069552/10000 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -740,12 +817,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449249 + transient_lastDdlTime 1266609397 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -771,12 +848,82 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-54_572_5652144002062223669/10000 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-57-45_370_2329158614359725032/10000 +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-57-45_370_2329158614359725032/10000 +564 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_2 +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-58-08_131_2181595303285853240/10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-27-54_572_5652144002062223669/10000 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-58-08_131_2181595303285853240/10000 564 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_2 +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +PREHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_hash_result_2 +PREHOOK: Input: default@bucketmapjoin_hash_result_1 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-58-27_520_4734833003819934734/10000 +POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_hash_result_2 +POSTHOOK: Input: default@bucketmapjoin_hash_result_1 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_11-58-27_520_4734833003819934734/10000 +0 0 0 +PREHOOK: query: drop table bucketmapjoin_hash_result_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucketmapjoin_hash_result_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +PREHOOK: query: drop table bucketmapjoin_hash_result_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucketmapjoin_hash_result_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 PREHOOK: query: drop table bucketmapjoin_tmp_result PREHOOK: type: DROPTABLE POSTHOOK: query: drop table bucketmapjoin_tmp_result Index: ql/src/test/results/clientpositive/bucketmapjoin4.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin4.q.out (revision 911664) +++ ql/src/test/results/clientpositive/bucketmapjoin4.q.out (working copy) @@ -53,6 +53,16 @@ POSTHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') POSTHOOK: type: LOAD POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: query: create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 +PREHOOK: query: create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 PREHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) PREHOOK: type: CREATETABLE POSTHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) @@ -120,7 +130,7 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-28-37_939_6661116731646550250/10002 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-02-13_110_7668606820926973462/10002 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -131,12 +141,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449317 + transient_lastDdlTime 1266609733 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -184,7 +194,7 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-28-37_939_6661116731646550250/10002 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-02-13_110_7668606820926973462/10002 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -195,24 +205,26 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449317 + transient_lastDdlTime 1266609733 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 MultiFileSpray: false Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + b {srcbucket20.txt=[srcbucket20.txt], srcbucket21.txt=[srcbucket21.txt]} Alias Bucket File Name Mapping: - b {file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt=[file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt=[file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt]} + b {file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt]} Needs Tagging: false Path -> Alias: - file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin [a] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin [a] Path -> Partition: - file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin Partition base file name: srcbucket_mapjoin input format: org.apache.hadoop.mapred.TextInputFormat @@ -224,12 +236,12 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin name srcbucket_mapjoin serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449316 + transient_lastDdlTime 1266609730 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -241,12 +253,12 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin name srcbucket_mapjoin serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449316 + transient_lastDdlTime 1266609730 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcbucket_mapjoin name: srcbucket_mapjoin @@ -258,14 +270,14 @@ Move Operator files: hdfs directory: true - source: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-28-37_939_6661116731646550250/10002 - destination: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-28-37_939_6661116731646550250/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-02-13_110_7668606820926973462/10002 + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-02-13_110_7668606820926973462/10000 Stage: Stage-0 Move Operator tables: replace: true - source: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-28-37_939_6661116731646550250/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-02-13_110_7668606820926973462/10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -275,20 +287,20 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449317 + transient_lastDdlTime 1266609733 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result - tmp directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-28-37_939_6661116731646550250/10001 + tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-02-13_110_7668606820926973462/10001 Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-28-37_939_6661116731646550250/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-02-13_110_7668606820926973462/10002 Reduce Output Operator sort order: Map-reduce partition columns: @@ -304,9 +316,9 @@ type: string Needs Tagging: false Path -> Alias: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-28-37_939_6661116731646550250/10002 [file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-28-37_939_6661116731646550250/10002] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-02-13_110_7668606820926973462/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-02-13_110_7668606820926973462/10002] Path -> Partition: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-28-37_939_6661116731646550250/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-02-13_110_7668606820926973462/10002 Partition base file name: 10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -317,12 +329,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449317 + transient_lastDdlTime 1266609733 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -333,12 +345,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449317 + transient_lastDdlTime 1266609733 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result name: bucketmapjoin_tmp_result @@ -347,7 +359,7 @@ File Output Operator compressed: false GlobalTableId: 0 - directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-28-37_939_6661116731646550250/10000 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-02-13_110_7668606820926973462/10000 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -358,12 +370,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449317 + transient_lastDdlTime 1266609733 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -387,12 +399,70 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-28-46_472_1713548732048317690/10000 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-02-25_327_674590801295331715/10000 +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-02-25_327_674590801295331715/10000 +464 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_1 +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin b +on a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin b +on a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-02-46_453_8978367011666027973/10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-28-46_472_1713548732048317690/10000 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-02-46_453_8978367011666027973/10000 464 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_2 +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +PREHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_hash_result_2 +PREHOOK: Input: default@bucketmapjoin_hash_result_1 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-02-56_443_5241320946861710888/10000 +POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_hash_result_2 +POSTHOOK: Input: default@bucketmapjoin_hash_result_1 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-02-56_443_5241320946861710888/10000 +0 0 0 PREHOOK: query: explain extended insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value @@ -455,7 +525,7 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-28-49_907_8827927976145573227/10002 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-03-02_596_607036606424853820/10002 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -466,12 +536,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449317 + transient_lastDdlTime 1266609733 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -519,7 +589,7 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-28-49_907_8827927976145573227/10002 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-03-02_596_607036606424853820/10002 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -530,21 +600,26 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449317 + transient_lastDdlTime 1266609733 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 MultiFileSpray: false + Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + a {srcbucket20.txt=[srcbucket20.txt], srcbucket21.txt=[srcbucket21.txt]} + Alias Bucket File Name Mapping: + a {file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt]} Needs Tagging: false Path -> Alias: - file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin [b] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin [b] Path -> Partition: - file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin Partition base file name: srcbucket_mapjoin input format: org.apache.hadoop.mapred.TextInputFormat @@ -556,12 +631,12 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin name srcbucket_mapjoin serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449316 + transient_lastDdlTime 1266609730 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -573,12 +648,12 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/srcbucket_mapjoin + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin name srcbucket_mapjoin serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449316 + transient_lastDdlTime 1266609730 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcbucket_mapjoin name: srcbucket_mapjoin @@ -590,14 +665,14 @@ Move Operator files: hdfs directory: true - source: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-28-49_907_8827927976145573227/10002 - destination: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-28-49_907_8827927976145573227/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-03-02_596_607036606424853820/10002 + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-03-02_596_607036606424853820/10000 Stage: Stage-0 Move Operator tables: replace: true - source: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-28-49_907_8827927976145573227/10000 + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-03-02_596_607036606424853820/10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -607,20 +682,20 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449317 + transient_lastDdlTime 1266609733 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result - tmp directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-28-49_907_8827927976145573227/10001 + tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-03-02_596_607036606424853820/10001 Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-28-49_907_8827927976145573227/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-03-02_596_607036606424853820/10002 Reduce Output Operator sort order: Map-reduce partition columns: @@ -636,9 +711,9 @@ type: string Needs Tagging: false Path -> Alias: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-28-49_907_8827927976145573227/10002 [file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-28-49_907_8827927976145573227/10002] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-03-02_596_607036606424853820/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-03-02_596_607036606424853820/10002] Path -> Partition: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-28-49_907_8827927976145573227/10002 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-03-02_596_607036606424853820/10002 Partition base file name: 10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -649,12 +724,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449317 + transient_lastDdlTime 1266609733 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -665,12 +740,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449317 + transient_lastDdlTime 1266609733 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result name: bucketmapjoin_tmp_result @@ -679,7 +754,7 @@ File Output Operator compressed: false GlobalTableId: 0 - directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-28-49_907_8827927976145573227/10000 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-03-02_596_607036606424853820/10000 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -690,12 +765,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449317 + transient_lastDdlTime 1266609733 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -719,12 +794,80 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-28-58_734_4207363820216741938/10000 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-03-20_989_3505050887489317938/10000 +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-03-20_989_3505050887489317938/10000 +464 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_1 +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin b +on a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin b +on a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-03-39_812_4569073467679647480/10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-28-58_734_4207363820216741938/10000 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-03-39_812_4569073467679647480/10000 464 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_2 +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +PREHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_hash_result_2 +PREHOOK: Input: default@bucketmapjoin_hash_result_1 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-03-49_562_6182929831238239608/10000 +POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_hash_result_2 +POSTHOOK: Input: default@bucketmapjoin_hash_result_1 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-03-49_562_6182929831238239608/10000 +0 0 0 +PREHOOK: query: drop table bucketmapjoin_hash_result_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucketmapjoin_hash_result_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +PREHOOK: query: drop table bucketmapjoin_hash_result_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucketmapjoin_hash_result_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 PREHOOK: query: drop table bucketmapjoin_tmp_result PREHOOK: type: DROPTABLE POSTHOOK: query: drop table bucketmapjoin_tmp_result Index: ql/src/test/results/clientpositive/bucketmapjoin5.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin5.q.out (revision 0) +++ ql/src/test/results/clientpositive/bucketmapjoin5.q.out (revision 0) @@ -0,0 +1,1020 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part +PREHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-09 +PREHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-09 +PREHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-09 +PREHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-09 +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +PREHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +PREHOOK: query: create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 +PREHOOK: query: create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +PREHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF srcbucket_mapjoin a) (TOK_TABREF srcbucket_mapjoin_part b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucketmapjoin_tmp_result)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-4 depends on stages: Stage-1 , consists of Stage-3, Stage-2 + Stage-3 + Stage-0 depends on stages: Stage-3, Stage-2 + Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col3 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-04-02_446_6013292898594914381/10002 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266609842 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + TotalFiles: 1 + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a + TableScan + alias: a + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col3 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-04-02_446_6013292898594914381/10002 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266609842 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + TotalFiles: 1 + MultiFileSpray: false + Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + a {srcbucket20.txt=[srcbucket20.txt], srcbucket21.txt=[srcbucket21.txt], srcbucket22.txt=[srcbucket20.txt], srcbucket23.txt=[srcbucket21.txt], ds=2008-04-09/srcbucket20.txt=[srcbucket20.txt], ds=2008-04-09/srcbucket21.txt=[srcbucket21.txt], ds=2008-04-09/srcbucket22.txt=[srcbucket20.txt], ds=2008-04-09/srcbucket23.txt=[srcbucket21.txt]} + Alias Bucket File Name Mapping: + a {file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket20.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket21.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket22.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket23.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt]} + Needs Tagging: false + Path -> Alias: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 [b] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09 [b] + Path -> Partition: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part + name srcbucket_mapjoin_part + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266609840 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part + name srcbucket_mapjoin_part + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266609840 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: srcbucket_mapjoin_part + name: srcbucket_mapjoin_part + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09 + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part + name srcbucket_mapjoin_part + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266609840 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part + name srcbucket_mapjoin_part + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266609840 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: srcbucket_mapjoin_part + name: srcbucket_mapjoin_part + + Stage: Stage-4 + Conditional Operator + + Stage: Stage-3 + Move Operator + files: + hdfs directory: true + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-04-02_446_6013292898594914381/10002 + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-04-02_446_6013292898594914381/10000 + + Stage: Stage-0 + Move Operator + tables: + replace: true + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-04-02_446_6013292898594914381/10000 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266609842 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-04-02_446_6013292898594914381/10001 + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-04-02_446_6013292898594914381/10002 + Reduce Output Operator + sort order: + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + value expressions: + expr: key + type: string + expr: value1 + type: string + expr: value2 + type: string + Needs Tagging: false + Path -> Alias: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-04-02_446_6013292898594914381/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-04-02_446_6013292898594914381/10002] + Path -> Partition: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-04-02_446_6013292898594914381/10002 + Partition + base file name: 10002 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266609842 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266609842 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + name: bucketmapjoin_tmp_result + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-04-02_446_6013292898594914381/10000 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266609842 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + TotalFiles: 1 + MultiFileSpray: false + + +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-09 +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-09 +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-04-18_053_756880030390690347/10000 +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-04-18_053_756880030390690347/10000 +928 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_1 +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-09 +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-09 +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-04-44_287_5741637011126274087/10000 +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-04-44_287_5741637011126274087/10000 +928 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_2 +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +PREHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_hash_result_2 +PREHOOK: Input: default@bucketmapjoin_hash_result_1 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-04-54_761_3232996986837526948/10000 +POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_hash_result_2 +POSTHOOK: Input: default@bucketmapjoin_hash_result_1 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-04-54_761_3232996986837526948/10000 +0 0 0 +PREHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF srcbucket_mapjoin a) (TOK_TABREF srcbucket_mapjoin_part_2 b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucketmapjoin_tmp_result)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-4 depends on stages: Stage-1 , consists of Stage-3, Stage-2 + Stage-3 + Stage-0 depends on stages: Stage-3, Stage-2 + Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col3 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-05-00_396_5642166507126495362/10002 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266609842 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + TotalFiles: 1 + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a + TableScan + alias: a + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col3 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-05-00_396_5642166507126495362/10002 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266609842 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + TotalFiles: 1 + MultiFileSpray: false + Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + a {srcbucket22.txt=[srcbucket20.txt], srcbucket23.txt=[srcbucket21.txt], ds=2008-04-09/srcbucket22.txt=[srcbucket20.txt], ds=2008-04-09/srcbucket23.txt=[srcbucket21.txt]} + Alias Bucket File Name Mapping: + a {file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-09/srcbucket22.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-09/srcbucket23.txt=[file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt]} + Needs Tagging: false + Path -> Alias: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08 [b] + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-09 [b] + Path -> Partition: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08 + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2 + name srcbucket_mapjoin_part_2 + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266609841 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2 + name srcbucket_mapjoin_part_2 + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266609841 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: srcbucket_mapjoin_part_2 + name: srcbucket_mapjoin_part_2 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-09 + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2 + name srcbucket_mapjoin_part_2 + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266609841 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2 + name srcbucket_mapjoin_part_2 + partition_columns ds + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266609841 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: srcbucket_mapjoin_part_2 + name: srcbucket_mapjoin_part_2 + + Stage: Stage-4 + Conditional Operator + + Stage: Stage-3 + Move Operator + files: + hdfs directory: true + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-05-00_396_5642166507126495362/10002 + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-05-00_396_5642166507126495362/10000 + + Stage: Stage-0 + Move Operator + tables: + replace: true + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-05-00_396_5642166507126495362/10000 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266609842 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-05-00_396_5642166507126495362/10001 + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-05-00_396_5642166507126495362/10002 + Reduce Output Operator + sort order: + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + value expressions: + expr: key + type: string + expr: value1 + type: string + expr: value2 + type: string + Needs Tagging: false + Path -> Alias: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-05-00_396_5642166507126495362/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-05-00_396_5642166507126495362/10002] + Path -> Partition: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-05-00_396_5642166507126495362/10002 + Partition + base file name: 10002 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266609842 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266609842 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + name: bucketmapjoin_tmp_result + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-05-00_396_5642166507126495362/10000 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266609842 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + TotalFiles: 1 + MultiFileSpray: false + + +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-05-11_755_6348633402873823899/10000 +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-05-11_755_6348633402873823899/10000 +0 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_1 +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-05-37_737_6565508873469024348/10000 +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-05-37_737_6565508873469024348/10000 +0 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_2 +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +PREHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_hash_result_2 +PREHOOK: Input: default@bucketmapjoin_hash_result_1 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-06-08_952_2250288981466343263/10000 +POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_hash_result_2 +POSTHOOK: Input: default@bucketmapjoin_hash_result_1 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-19_12-06-08_952_2250288981466343263/10000 +NULL NULL NULL +PREHOOK: query: drop table bucketmapjoin_tmp_result +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucketmapjoin_tmp_result +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: drop table srcbucket_mapjoin +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table srcbucket_mapjoin +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: drop table srcbucket_mapjoin_part +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table srcbucket_mapjoin_part +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part +PREHOOK: query: drop table srcbucket_mapjoin_part_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table srcbucket_mapjoin_part_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 Index: ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out (revision 0) +++ ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out (revision 0) @@ -0,0 +1,383 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part +PREHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08" +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08" +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF srcbucket_mapjoin a) (TOK_TABREF srcbucket_mapjoin_part b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucketmapjoin_tmp_result)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08")))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-4 depends on stages: Stage-1 , consists of Stage-3, Stage-2 + Stage-3 + Stage-0 depends on stages: Stage-3, Stage-2 + Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} {ds} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col3, _col4 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + outputColumnNames: _col0, _col1, _col3, _col4 + Filter Operator + isSamplingPred: false + predicate: + expr: (_col4 = '2008-04-08') + type: boolean + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-18_19-39-18_019_1221983328675198320/10002 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266550758 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + TotalFiles: 1 + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + b + TableScan + alias: b + Filter Operator + isSamplingPred: false + predicate: + expr: (ds = '2008-04-08') + type: boolean + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} {ds} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col3, _col4 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + outputColumnNames: _col0, _col1, _col3, _col4 + Filter Operator + isSamplingPred: false + predicate: + expr: (_col4 = '2008-04-08') + type: boolean + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-18_19-39-18_019_1221983328675198320/10002 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266550758 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + TotalFiles: 1 + MultiFileSpray: false + Needs Tagging: false + Path -> Alias: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin [a] + Path -> Partition: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin + Partition + base file name: srcbucket_mapjoin + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin + name srcbucket_mapjoin + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266550757 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin + name srcbucket_mapjoin + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266550757 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: srcbucket_mapjoin + name: srcbucket_mapjoin + + Stage: Stage-4 + Conditional Operator + + Stage: Stage-3 + Move Operator + files: + hdfs directory: true + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-18_19-39-18_019_1221983328675198320/10002 + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-18_19-39-18_019_1221983328675198320/10000 + + Stage: Stage-0 + Move Operator + tables: + replace: true + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-18_19-39-18_019_1221983328675198320/10000 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266550758 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-18_19-39-18_019_1221983328675198320/10001 + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-18_19-39-18_019_1221983328675198320/10002 + Reduce Output Operator + sort order: + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + value expressions: + expr: key + type: string + expr: value1 + type: string + expr: value2 + type: string + Needs Tagging: false + Path -> Alias: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-18_19-39-18_019_1221983328675198320/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-18_19-39-18_019_1221983328675198320/10002] + Path -> Partition: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-18_19-39-18_019_1221983328675198320/10002 + Partition + base file name: 10002 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266550758 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266550758 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + name: bucketmapjoin_tmp_result + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-18_19-39-18_019_1221983328675198320/10000 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266550758 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + TotalFiles: 1 + MultiFileSpray: false + + +PREHOOK: query: drop table bucketmapjoin_tmp_result +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucketmapjoin_tmp_result +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: drop table srcbucket_mapjoin +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table srcbucket_mapjoin +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: drop table srcbucket_mapjoin_part +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table srcbucket_mapjoin_part +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part Index: ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out (revision 0) +++ ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out (revision 0) @@ -0,0 +1,369 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +PREHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +PREHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF srcbucket_mapjoin a) (TOK_TABREF srcbucket_mapjoin_part_2 b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucketmapjoin_tmp_result)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-4 depends on stages: Stage-1 , consists of Stage-3, Stage-2 + Stage-3 + Stage-0 depends on stages: Stage-3, Stage-2 + Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col3 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-18_19-39-21_734_2075259571912037089/10002 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266550761 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + TotalFiles: 1 + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + b + TableScan + alias: b + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col3 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-18_19-39-21_734_2075259571912037089/10002 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266550761 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + TotalFiles: 1 + MultiFileSpray: false + Needs Tagging: false + Path -> Alias: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin [a] + Path -> Partition: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin + Partition + base file name: srcbucket_mapjoin + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin + name srcbucket_mapjoin + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266550760 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/srcbucket_mapjoin + name srcbucket_mapjoin + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266550760 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: srcbucket_mapjoin + name: srcbucket_mapjoin + + Stage: Stage-4 + Conditional Operator + + Stage: Stage-3 + Move Operator + files: + hdfs directory: true + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-18_19-39-21_734_2075259571912037089/10002 + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-18_19-39-21_734_2075259571912037089/10000 + + Stage: Stage-0 + Move Operator + tables: + replace: true + source: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-18_19-39-21_734_2075259571912037089/10000 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266550761 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-18_19-39-21_734_2075259571912037089/10001 + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-18_19-39-21_734_2075259571912037089/10002 + Reduce Output Operator + sort order: + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + value expressions: + expr: key + type: string + expr: value1 + type: string + expr: value2 + type: string + Needs Tagging: false + Path -> Alias: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-18_19-39-21_734_2075259571912037089/10002 [file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-18_19-39-21_734_2075259571912037089/10002] + Path -> Partition: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-18_19-39-21_734_2075259571912037089/10002 + Partition + base file name: 10002 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266550761 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266550761 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + name: bucketmapjoin_tmp_result + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-18_19-39-21_734_2075259571912037089/10000 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.types string:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + name bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1266550761 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + TotalFiles: 1 + MultiFileSpray: false + + +PREHOOK: query: drop table bucketmapjoin_tmp_result +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucketmapjoin_tmp_result +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: drop table srcbucket_mapjoin +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table srcbucket_mapjoin +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: drop table srcbucket_mapjoin_part_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table srcbucket_mapjoin_part_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part_2