diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java index 26afe90faa..ef148d9b05 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java @@ -108,14 +108,15 @@ public int compare(InputSplit inp1, InputSplit inp2) { private final Multimap bucketToTaskMap = HashMultimap. create(); private final Map> inputToGroupedSplitMap = - new HashMap>(); + new HashMap<>(); private int numInputsAffectingRootInputSpecUpdate = 1; private int numInputsSeenSoFar = 0; private final Map emMap = Maps.newHashMap(); private final List finalSplits = Lists.newLinkedList(); private final Map inputNameInputSpecMap = - new HashMap(); + new HashMap<>(); + private Map inputToBucketMap; public CustomPartitionVertex(VertexManagerPluginContext context) { super(context); @@ -137,6 +138,7 @@ public void initialize() { this.mainWorkName = vertexConf.getInputName(); this.vertexType = vertexConf.getVertexType(); this.numInputsAffectingRootInputSpecUpdate = vertexConf.getNumInputs(); + this.inputToBucketMap = vertexConf.getInputToBucketMap(); } @Override @@ -242,7 +244,7 @@ public void onRootVertexInitialized(String inputName, InputDescriptor inputDescr } Multimap bucketToInitialSplitMap = - getBucketSplitMapForPath(pathFileSplitsMap); + getBucketSplitMapForPath(inputName, pathFileSplitsMap); try { int totalResource = context.getTotalAvailableResource().getMemory(); @@ -532,20 +534,47 @@ private FileSplit getFileSplitFromEvent(InputDataInformationEvent event) throws /* * This method generates the map of bucket to file splits. */ - private Multimap getBucketSplitMapForPath( + private Multimap getBucketSplitMapForPath(String inputName, Map> pathFileSplitsMap) { - int bucketNum = 0; Multimap bucketToInitialSplitMap = - ArrayListMultimap. create(); + ArrayListMultimap.create(); + boolean fallback = false; + List bucketIds = new ArrayList<>(); for (Map.Entry> entry : pathFileSplitsMap.entrySet()) { - int bucketId = bucketNum % numBuckets; + // Extract the buckedID from pathFilesMap, this is more accurate method, + // however. it may not work in certain cases where buckets are named + // after files used while loading data. In such case, fallback to old + // potential inaccurate method. + // The accepted file names are such as 000000_0, 000001_0_copy_1. + String bucketIdStr = + Utilities.getBucketFileNameFromPathSubString(entry.getKey()); + int bucketId = Utilities.getBucketIdFromFile(bucketIdStr); + if (bucketId == -1) { + fallback = true; + LOG.info("Fallback to using older sort based logic to assign " + + "buckets to splits."); + bucketIds.clear(); + break; + } + bucketIds.add(bucketId); for (FileSplit fsplit : entry.getValue()) { bucketToInitialSplitMap.put(bucketId, fsplit); } - bucketNum++; + } + + int bucketNum = 0; + if (fallback) { + // This is the old logic which assumes that the filenames are sorted in + // alphanumeric order and mapped to appropriate bucket number. + for (Map.Entry> entry : pathFileSplitsMap.entrySet()) { + for (FileSplit fsplit : entry.getValue()) { + bucketToInitialSplitMap.put(bucketNum, fsplit); + } + bucketNum++; + } } // this is just for SMB join use-case. The numBuckets would be equal to that of the big table @@ -553,16 +582,28 @@ private FileSplit getFileSplitFromEvent(InputDataInformationEvent event) throws // data from the right buckets to the big table side. For e.g. Big table has 8 buckets and small // table has 4 buckets, bucket 0 of small table needs to be sent to bucket 4 of the big table as // well. - if (bucketNum < numBuckets) { - int loopedBucketId = 0; - for (; bucketNum < numBuckets; bucketNum++) { - for (InputSplit fsplit : bucketToInitialSplitMap.get(loopedBucketId)) { - bucketToInitialSplitMap.put(bucketNum, fsplit); + if (numInputsAffectingRootInputSpecUpdate != 1 && + inputName.compareTo(mainWorkName) != 0) { + // small table + int inputNumBuckets = inputToBucketMap.get(inputName); + if (fallback && bucketNum != inputNumBuckets) { + // The fallback mechanism kicked in which only works correctly if + // there exists a file for each bucket, else it may result in wrong + // result. Throw an error + + } + if (inputNumBuckets < numBuckets) { + // Need to send the splits to multiple buckets + for (int i = 1; i < numBuckets/inputNumBuckets; i++) { + int bucketIdBase = i * inputNumBuckets; + for (Integer bucketId : bucketIds) { + for (InputSplit fsplit : bucketToInitialSplitMap.get(bucketId)) { + bucketToInitialSplitMap.put(bucketIdBase + bucketId, fsplit); + } + } } - loopedBucketId++; } } - return bucketToInitialSplitMap; } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomVertexConfiguration.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomVertexConfiguration.java index ef5e7edcd6..4301829517 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomVertexConfiguration.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomVertexConfiguration.java @@ -21,7 +21,10 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import com.google.common.base.Preconditions; import org.apache.hadoop.hive.ql.plan.TezWork.VertexType; import org.apache.hadoop.io.Writable; @@ -39,22 +42,24 @@ private VertexType vertexType = VertexType.AUTO_INITIALIZED_EDGES; private int numInputs; private String inputName; + private Map inputToBucketMap; public CustomVertexConfiguration() { } // this is the constructor to use for the Bucket map join case. public CustomVertexConfiguration(int numBuckets, VertexType vertexType) { - this(numBuckets, vertexType, "", 1); + this(numBuckets, vertexType, "", 1, null); } // this is the constructor to use for SMB. public CustomVertexConfiguration(int numBuckets, VertexType vertexType, String inputName, - int numInputs) { + int numInputs, Map inputToBucketMap) { this.numBuckets = numBuckets; this.vertexType = vertexType; this.numInputs = numInputs; this.inputName = inputName; + this.inputToBucketMap = inputToBucketMap; } @Override @@ -63,6 +68,14 @@ public void write(DataOutput out) throws IOException { out.writeInt(this.numBuckets); out.writeInt(numInputs); out.writeUTF(inputName); + int sz = inputToBucketMap != null ? inputToBucketMap.size() : 0; + out.writeInt(sz); + if (sz > 0) { + for (Map.Entry entry : inputToBucketMap.entrySet()) { + out.writeUTF(entry.getKey()); + out.writeInt(entry.getValue()); + } + } } @Override @@ -71,6 +84,16 @@ public void readFields(DataInput in) throws IOException { this.numBuckets = in.readInt(); this.numInputs = in.readInt(); this.inputName = in.readUTF(); + int sz = in.readInt(); + Preconditions.checkState(sz >= 0); + if (sz == 0) { + this.inputToBucketMap = null; + } else { + this.inputToBucketMap = new HashMap<>(); + for (int i = 0; i < sz; i++) { + this.inputToBucketMap.put(in.readUTF(), in.readInt()); + } + } } public int getNumBuckets() { @@ -88,4 +111,8 @@ public String getInputName() { public int getNumInputs() { return numInputs; } + + public Map getInputToBucketMap() { + return inputToBucketMap; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java index 9885038588..0e75f6e5e8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java @@ -21,6 +21,7 @@ import java.util.concurrent.ConcurrentHashMap; import com.google.common.base.Function; +import com.google.common.base.Preconditions; import com.google.common.collect.Iterators; import com.google.common.collect.Lists; import javax.security.auth.login.LoginException; @@ -568,13 +569,26 @@ private Vertex createVertex(JobConf conf, MergeJoinWork mergeJoinWork, FileSyste MultiMRInput.createConfigBuilder(conf, HiveInputFormat.class).build()); } + // To be populated for SMB joins only for all the small tables + Map inputToBucketMap = new HashMap<>(); + if (mergeJoinWork.getMergeJoinOperator().getParentOperators().size() == 1 + && mergeJoinWork.getMergeJoinOperator().getOpTraits() != null) { + // This is an SMB join. + for (BaseWork work : mapWorkList) { + MapWork mw = (MapWork) work; + Map> aliasToWork = mw.getAliasToWork(); + Preconditions.checkState(aliasToWork.size() == 1, + "More than 1 alias in SMB mapwork"); + inputToBucketMap.put(mw.getName(), mw.getWorks().get(0).getOpTraits().getNumBuckets()); + } + } VertexManagerPluginDescriptor desc = VertexManagerPluginDescriptor.create(CustomPartitionVertex.class.getName()); // the +1 to the size is because of the main work. CustomVertexConfiguration vertexConf = new CustomVertexConfiguration(mergeJoinWork.getMergeJoinOperator().getConf() .getNumBuckets(), vertexType, mergeJoinWork.getBigTableAlias(), - mapWorkList.size() + 1); + mapWorkList.size() + 1, inputToBucketMap); DataOutputBuffer dob = new DataOutputBuffer(); vertexConf.write(dob); byte[] userPayload = dob.getData(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java index 54f5bab6de..361976311d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java @@ -23,6 +23,7 @@ import java.io.Serializable; import java.net.URI; import java.net.URISyntaxException; +import java.util.Arrays; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -160,6 +161,48 @@ private URI initializeFromURI(String fromPath, boolean isLocal) throws IOExcepti "source contains directory: " + oneSrc.getPath().toString())); } } + // Do another loop if table is bucketed + List bucketCols = table.getBucketCols(); + if (bucketCols != null && !bucketCols.isEmpty()) { + // Hive assumes that user names the files as per the corresponding + // bucket. For e.g, file names should follow the format 000000_0, 000000_1 etc. + // Here the 1st file will belong to bucket 0 and 2nd to bucket 1 and so on. + boolean[] bucketArray = new boolean[table.getNumBuckets()]; + // initialize the array + Arrays.fill(bucketArray, false); + int numBuckets = table.getNumBuckets(); + + for (FileStatus oneSrc : srcs) { + String bucketName = oneSrc.getPath().getName(); + + //get the bucket id + String bucketIdStr = + Utilities.getBucketFileNameFromPathSubString(bucketName); + int bucketId = Utilities.getBucketIdFromFile(bucketIdStr); + LOG.debug("bucket ID for file " + oneSrc.getPath() + " = " + bucketId + + " for table " + table.getFullyQualifiedName()); + if (bucketId == -1) { + throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg( + "The file name is invalid : " + + oneSrc.getPath().toString() + " for table " + + table.getFullyQualifiedName())); + } + if (bucketId >= numBuckets) { + throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg( + "The file name corresponds to invalid bucketId : " + + oneSrc.getPath().toString()) + + ". Maximum number of buckets can be " + numBuckets + + " for table " + table.getFullyQualifiedName()); + } + if (bucketArray[bucketId]) { + throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg( + "Multiple files for same bucket : " + bucketId + + ". Only 1 file per bucket allowed in single load command. To load multiple files for same bucket, use multiple statements for table " + + table.getFullyQualifiedName())); + } + bucketArray[bucketId] = true; + } + } } catch (IOException e) { // Has to use full name to make sure it does not conflict with // org.apache.commons.lang.StringUtils diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q index e5fdcb57e4..b7bd10eed2 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q @@ -1,19 +1,21 @@ set hive.strict.checks.bucketing=false; set hive.mapred.mode=nonstrict; --- small 1 part, 4 bucket & big 2 part, 2 bucket -CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +-- small 1 part, 2 bucket & big 2 part, 4 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; load data local inpath '../../data/files/auto_sortmerge_join/small/000000_0' INTO TABLE bucket_small partition(ds='2008-04-08'); load data local inpath '../../data/files/auto_sortmerge_join/small/000001_0' INTO TABLE bucket_small partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small partition(ds='2008-04-08'); -CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-08'); load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08'); load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-09'); load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09'); set hive.auto.convert.join=true; set hive.auto.convert.sortmerge.join=true; diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q index abf09e5534..9f719aebb5 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q @@ -1,7 +1,7 @@ set hive.strict.checks.bucketing=false; set hive.mapred.mode=nonstrict; --- small 2 part, 4 bucket & big 1 part, 2 bucket +-- small 2 part, 4 bucket & big 1 part, 4 bucket CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; load data local inpath '../../data/files/auto_sortmerge_join/small/000000_0' INTO TABLE bucket_small partition(ds='2008-04-08'); load data local inpath '../../data/files/auto_sortmerge_join/small/000001_0' INTO TABLE bucket_small partition(ds='2008-04-08'); @@ -13,9 +13,11 @@ load data local inpath '../../data/files/auto_sortmerge_join/small/000001_0' INT load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small partition(ds='2008-04-09'); load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small partition(ds='2008-04-09'); -CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-08'); load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08'); set hive.auto.convert.join=true; set hive.auto.convert.sortmerge.join=true; diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q index b85c4a7aa3..c107501d0a 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q @@ -1,19 +1,19 @@ set hive.strict.checks.bucketing=false; set hive.mapred.mode=nonstrict; --- small no part, 4 bucket & big no part, 2 bucket +-- small no part, 2 bucket & big no part, 4 bucket -- SORT_QUERY_RESULTS -CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; load data local inpath '../../data/files/auto_sortmerge_join/small/000000_0' INTO TABLE bucket_small; load data local inpath '../../data/files/auto_sortmerge_join/small/000001_0' INTO TABLE bucket_small; -load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small; -load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small; -CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big; load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big; +load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big; +load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big; set hive.auto.convert.sortmerge.join=true; set hive.optimize.bucketmapjoin = true; diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q index bd780861e3..a5cc04a97f 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q @@ -1,7 +1,7 @@ set hive.strict.checks.bucketing=false; set hive.mapred.mode=nonstrict; --- small 2 part, 4 bucket & big 2 part, 2 bucket +-- small 2 part, 4 bucket & big 2 part, 4 bucket CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; load data local inpath '../../data/files/auto_sortmerge_join/small/000000_0' INTO TABLE bucket_small partition(ds='2008-04-08'); load data local inpath '../../data/files/auto_sortmerge_join/small/000001_0' INTO TABLE bucket_small partition(ds='2008-04-08'); @@ -13,12 +13,16 @@ load data local inpath '../../data/files/auto_sortmerge_join/small/000001_0' INT load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small partition(ds='2008-04-09'); load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small partition(ds='2008-04-09'); -CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-08'); load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08'); load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-09'); load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09'); set hive.auto.convert.join=true; set hive.auto.convert.sortmerge.join=true; diff --git a/ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out b/ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out index b9c2e6f827..37dbbf9ff3 100644 --- a/ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out +++ b/ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out @@ -53,174 +53,4 @@ POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@srcbucket_mapjoin_part_2 -PREHOOK: query: load data local inpath '../../data/files/bmj/000002_0' - INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@srcbucket_mapjoin_part_2 -POSTHOOK: query: load data local inpath '../../data/files/bmj/000002_0' - INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@srcbucket_mapjoin_part_2 -POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 -PREHOOK: query: load data local inpath '../../data/files/bmj/000003_0' - INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 -POSTHOOK: query: load data local inpath '../../data/files/bmj/000003_0' - INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') -POSTHOOK: type: LOAD #### A masked pattern was here #### -POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 -PREHOOK: query: explain -select a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b -on a.key=b.key and a.ds="2008-04-08" and b.ds="2008-04-08" -PREHOOK: type: QUERY -POSTHOOK: query: explain -select a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b -on a.key=b.key and a.ds="2008-04-08" and b.ds="2008-04-08" -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 108 Data size: 42000 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 108 Data size: 42000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 108 Data size: 42000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 108 Data size: 42000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - TableScan - alias: b - Statistics: Num rows: 78 Data size: 30620 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 78 Data size: 30620 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 78 Data size: 30620 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 78 Data size: 30620 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col4 - Statistics: Num rows: 118 Data size: 46200 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 118 Data size: 46200 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 118 Data size: 46200 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain -select /*+mapjoin(b)*/ a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b -on a.key=b.key and a.ds="2008-04-08" and b.ds="2008-04-08" -PREHOOK: type: QUERY -POSTHOOK: query: explain -select /*+mapjoin(b)*/ a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b -on a.key=b.key and a.ds="2008-04-08" and b.ds="2008-04-08" -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-3 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 102 Data size: 30620 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 102 Data size: 30620 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 140 Data size: 42000 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 140 Data size: 42000 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col7 - Statistics: Num rows: 154 Data size: 46200 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 154 Data size: 46200 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 154 Data size: 46200 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -FAILED: SemanticException [Error 10136]: Bucketed mapjoin cannot be performed. This can be due to multiple reasons: . Join columns dont match bucketed columns. . Number of buckets are not a multiple of each other. If you really want to perform the operation, either remove the mapjoin hint from your query or set hive.enforce.bucketmapjoin to false. diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out index 5cfc35aa73..dda72115c5 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out @@ -1,8 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_small -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_small @@ -23,27 +23,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small@ds=2008-04-08 -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small partition(ds='2008-04-08') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small@ds=2008-04-08 -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small partition(ds='2008-04-08') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small@ds=2008-04-08 -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small partition(ds='2008-04-08') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small@ds=2008-04-08 -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small partition(ds='2008-04-08') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small@ds=2008-04-08 -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -64,6 +48,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-09') PREHOOK: type: LOAD #### A masked pattern was here #### @@ -81,6 +81,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -95,16 +111,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -134,7 +150,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -142,7 +158,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -150,7 +166,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -158,7 +174,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -183,7 +199,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -191,7 +207,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -199,7 +215,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -207,7 +223,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -308,7 +324,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -316,7 +332,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -324,7 +340,7 @@ STAGE PLANS: serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -332,7 +348,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -353,16 +369,16 @@ STAGE PLANS: $hdt$_1:b TableScan alias: b - Statistics: Num rows: 4 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1140 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 4 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1140 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1140 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) @@ -374,16 +390,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -414,7 +430,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -422,7 +438,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -430,7 +446,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -438,7 +454,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -463,7 +479,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -471,7 +487,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -479,7 +495,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -487,7 +503,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -511,7 +527,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -519,7 +535,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -527,7 +543,7 @@ STAGE PLANS: serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -535,7 +551,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -597,7 +613,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -605,7 +621,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -613,7 +629,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -621,7 +637,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -645,7 +661,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -653,7 +669,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -661,7 +677,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -669,7 +685,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -690,16 +706,16 @@ STAGE PLANS: $hdt$_0:a TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) @@ -711,16 +727,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 4 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1140 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 4 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1140 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1140 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -751,7 +767,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -759,7 +775,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -767,7 +783,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -775,7 +791,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -800,7 +816,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -808,7 +824,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -816,7 +832,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -824,7 +840,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -848,7 +864,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -856,7 +872,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -864,7 +880,7 @@ STAGE PLANS: serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -872,7 +888,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -924,16 +940,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -963,7 +979,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -971,7 +987,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -979,7 +995,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -987,7 +1003,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1012,7 +1028,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1020,7 +1036,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -1028,7 +1044,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1036,7 +1052,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out index 0d586fd26b..b54c574358 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out @@ -72,11 +72,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small@ds=2008-04-09 -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -97,6 +97,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key @@ -111,16 +127,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -150,7 +166,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -158,7 +174,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -166,7 +182,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -174,7 +190,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -243,7 +259,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -38 +78 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -258,16 +274,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -297,7 +313,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -305,7 +321,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -313,7 +329,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -321,7 +337,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -390,7 +406,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -38 +78 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -534,16 +550,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -574,7 +590,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -582,7 +598,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -590,7 +606,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -598,7 +614,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -755,7 +771,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -763,7 +779,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -771,7 +787,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -779,7 +795,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -800,16 +816,16 @@ STAGE PLANS: $hdt$_0:a TableScan alias: a - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) @@ -861,7 +877,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -869,7 +885,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -877,7 +893,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -885,7 +901,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1034,16 +1050,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -1073,7 +1089,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1081,7 +1097,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -1089,7 +1105,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1097,7 +1113,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1166,4 +1182,4 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -38 +78 diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out index 45704d1253..451c3b3353 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out @@ -1,8 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_small -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_small @@ -22,27 +22,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -62,6 +46,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key @@ -76,16 +76,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -114,7 +114,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -122,13 +122,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -136,7 +136,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -144,13 +144,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big @@ -216,16 +216,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -254,7 +254,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -262,13 +262,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -276,7 +276,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -284,13 +284,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big @@ -369,16 +369,16 @@ STAGE PLANS: $hdt$_1:b TableScan alias: b - Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) @@ -390,16 +390,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -429,7 +429,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -437,13 +437,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -451,7 +451,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -459,13 +459,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big @@ -475,7 +475,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -492,7 +492,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -500,13 +500,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small @@ -551,16 +551,16 @@ STAGE PLANS: $hdt$_0:a TableScan alias: a - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) @@ -572,16 +572,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -611,7 +611,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -619,13 +619,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -633,7 +633,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -641,13 +641,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big @@ -657,7 +657,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -674,7 +674,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -682,13 +682,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small @@ -728,16 +728,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -766,7 +766,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -774,13 +774,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -788,7 +788,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -796,13 +796,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out index 1959075912..f335142360 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out @@ -72,11 +72,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small@ds=2008-04-09 -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -97,6 +97,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-09') PREHOOK: type: LOAD #### A masked pattern was here #### @@ -114,6 +130,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key @@ -128,16 +160,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -167,7 +199,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -175,7 +207,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -183,7 +215,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -191,7 +223,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -216,7 +248,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -224,7 +256,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -232,7 +264,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -240,7 +272,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -312,7 +344,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -76 +156 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -327,16 +359,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -366,7 +398,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -374,7 +406,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -382,7 +414,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -390,7 +422,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -415,7 +447,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -423,7 +455,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -431,7 +463,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -439,7 +471,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -511,7 +543,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -76 +156 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -655,16 +687,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -695,7 +727,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -703,7 +735,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -711,7 +743,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -719,7 +751,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -744,7 +776,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -752,7 +784,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -760,7 +792,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -768,7 +800,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -926,7 +958,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -934,7 +966,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -942,7 +974,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -950,7 +982,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -974,7 +1006,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -982,7 +1014,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -990,7 +1022,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -998,7 +1030,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1019,16 +1051,16 @@ STAGE PLANS: $hdt$_0:a TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) @@ -1080,7 +1112,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1088,7 +1120,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -1096,7 +1128,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1104,7 +1136,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1129,7 +1161,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1137,7 +1169,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -1145,7 +1177,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1153,7 +1185,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1302,16 +1334,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -1341,7 +1373,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1349,7 +1381,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -1357,7 +1389,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1365,7 +1397,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1390,7 +1422,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1398,7 +1430,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -1406,7 +1438,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1414,7 +1446,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1486,4 +1518,4 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -76 +156 diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_2.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_2.q.out index 054b0d00be..d4472cf2a0 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_2.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_2.q.out @@ -1,8 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_small -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_small @@ -23,27 +23,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small@ds=2008-04-08 -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small partition(ds='2008-04-08') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small@ds=2008-04-08 -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small partition(ds='2008-04-08') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small@ds=2008-04-08 -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small partition(ds='2008-04-08') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small@ds=2008-04-08 -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small partition(ds='2008-04-08') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small@ds=2008-04-08 -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -64,6 +48,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-09') PREHOOK: type: LOAD #### A masked pattern was here #### @@ -81,6 +81,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -101,16 +117,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 4 Data size: 2996 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1508 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 4 Data size: 2996 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1508 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 2996 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1508 Basic stats: COMPLETE Column stats: NONE Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -121,7 +137,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -129,7 +145,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -137,7 +153,7 @@ STAGE PLANS: serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -145,7 +161,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -167,16 +183,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 74872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 158376 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 107 Data size: 71529 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 228 Data size: 150457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 107 Data size: 71529 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 228 Data size: 150457 Basic stats: COMPLETE Column stats: NONE Merge Join Operator condition map: Inner Join 0 to 1 @@ -184,7 +200,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 117 Data size: 78681 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 165502 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -208,7 +224,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -216,7 +232,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -224,7 +240,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -232,7 +248,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -256,7 +272,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -264,7 +280,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -272,7 +288,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -280,7 +296,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -375,16 +391,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 4 Data size: 2996 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1508 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 4 Data size: 2996 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1508 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 2996 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1508 Basic stats: COMPLETE Column stats: NONE Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -395,7 +411,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -403,7 +419,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -411,7 +427,7 @@ STAGE PLANS: serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -419,7 +435,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -441,16 +457,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 74872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 158376 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 107 Data size: 71529 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 228 Data size: 150457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 107 Data size: 71529 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 228 Data size: 150457 Basic stats: COMPLETE Column stats: NONE Merge Join Operator condition map: Inner Join 0 to 1 @@ -458,7 +474,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 117 Data size: 78681 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 165502 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -482,7 +498,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -490,7 +506,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -498,7 +514,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -506,7 +522,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -530,7 +546,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -538,7 +554,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -546,7 +562,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -554,7 +570,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_4.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_4.q.out index 95d329862c..5cd5d798bc 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_4.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_4.q.out @@ -72,11 +72,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small@ds=2008-04-09 -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -97,6 +97,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key @@ -232,16 +248,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 56 Data size: 37620 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 79280 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 54 Data size: 36276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 114 Data size: 75316 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 54 Data size: 36276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 114 Data size: 75316 Basic stats: COMPLETE Column stats: NONE Merge Join Operator condition map: Inner Join 0 to 1 @@ -249,7 +265,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 1 - Statistics: Num rows: 59 Data size: 39903 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 82847 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -273,7 +289,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -281,7 +297,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -289,7 +305,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -297,7 +313,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -370,7 +386,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -38 +78 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -506,16 +522,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 56 Data size: 37620 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 79280 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 54 Data size: 36276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 114 Data size: 75316 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 54 Data size: 36276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 114 Data size: 75316 Basic stats: COMPLETE Column stats: NONE Merge Join Operator condition map: Inner Join 0 to 1 @@ -523,7 +539,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 59 Data size: 39903 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 82847 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -547,7 +563,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -555,7 +571,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -563,7 +579,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -571,7 +587,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -644,7 +660,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -38 +78 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -780,16 +796,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 56 Data size: 37620 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 79280 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 54 Data size: 36276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 114 Data size: 75316 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 54 Data size: 36276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 114 Data size: 75316 Basic stats: COMPLETE Column stats: NONE Merge Join Operator condition map: Inner Join 0 to 1 @@ -797,7 +813,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 59 Data size: 39903 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 82847 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -821,7 +837,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -829,7 +845,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -837,7 +853,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -845,7 +861,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -918,4 +934,4 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -38 +78 diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_5.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_5.q.out index e711715aa5..a18f4b21fc 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_5.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_5.q.out @@ -1,8 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_small -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_small @@ -22,27 +22,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -62,6 +46,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key @@ -101,7 +101,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -109,13 +109,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -123,7 +123,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -131,13 +131,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small @@ -187,7 +187,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -195,13 +195,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -209,7 +209,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -217,13 +217,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big @@ -318,7 +318,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -326,13 +326,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -340,7 +340,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -348,13 +348,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small @@ -404,7 +404,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -412,13 +412,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -426,7 +426,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -434,13 +434,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big @@ -562,7 +562,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -570,13 +570,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -584,7 +584,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -592,13 +592,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big @@ -639,7 +639,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -647,13 +647,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -661,7 +661,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -669,13 +669,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out index 53c685cb11..fdea211fa4 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out @@ -72,11 +72,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small@ds=2008-04-09 -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -97,6 +97,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-09') PREHOOK: type: LOAD #### A masked pattern was here #### @@ -114,6 +130,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key @@ -249,16 +281,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 112 Data size: 74872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 158376 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 107 Data size: 71529 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 228 Data size: 150457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 107 Data size: 71529 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 228 Data size: 150457 Basic stats: COMPLETE Column stats: NONE Merge Join Operator condition map: Inner Join 0 to 1 @@ -266,7 +298,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 1 - Statistics: Num rows: 117 Data size: 78681 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 165502 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -290,7 +322,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -298,7 +330,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -306,7 +338,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -314,7 +346,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -338,7 +370,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -346,7 +378,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -354,7 +386,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -362,7 +394,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -438,7 +470,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -76 +156 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -574,16 +606,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 74872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 158376 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 107 Data size: 71529 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 228 Data size: 150457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 107 Data size: 71529 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 228 Data size: 150457 Basic stats: COMPLETE Column stats: NONE Merge Join Operator condition map: Inner Join 0 to 1 @@ -591,7 +623,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 117 Data size: 78681 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 165502 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -615,7 +647,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -623,7 +655,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -631,7 +663,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -639,7 +671,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -663,7 +695,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -671,7 +703,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -679,7 +711,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -687,7 +719,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -763,7 +795,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -76 +156 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -899,16 +931,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 74872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 158376 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 107 Data size: 71529 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 228 Data size: 150457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 107 Data size: 71529 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 228 Data size: 150457 Basic stats: COMPLETE Column stats: NONE Merge Join Operator condition map: Inner Join 0 to 1 @@ -916,7 +948,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 117 Data size: 78681 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 165502 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -940,7 +972,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -948,7 +980,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -956,7 +988,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -964,7 +996,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -988,7 +1020,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -996,7 +1028,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -1004,7 +1036,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1012,7 +1044,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1088,4 +1120,4 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -76 +156 diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out index 8cfa113794..117ff4aecc 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out @@ -1,8 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_small -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_small @@ -23,27 +23,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small@ds=2008-04-08 -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small partition(ds='2008-04-08') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small@ds=2008-04-08 -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small partition(ds='2008-04-08') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small@ds=2008-04-08 -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small partition(ds='2008-04-08') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small@ds=2008-04-08 -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small partition(ds='2008-04-08') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small@ds=2008-04-08 -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -64,6 +48,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-09') PREHOOK: type: LOAD #### A masked pattern was here #### @@ -81,6 +81,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -100,16 +116,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -117,7 +133,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 123 Data size: 60500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 264 Data size: 127864 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -142,7 +158,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -150,7 +166,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -158,7 +174,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -166,7 +182,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -191,7 +207,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -199,7 +215,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -207,7 +223,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -215,7 +231,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -308,16 +324,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -325,7 +341,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 123 Data size: 60500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 264 Data size: 127864 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -350,7 +366,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -358,7 +374,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -366,7 +382,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -374,7 +390,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -399,7 +415,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -407,7 +423,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -415,7 +431,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -423,7 +439,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out index fce5e0cfc4..aff5a0d242 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out @@ -72,11 +72,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small@ds=2008-04-09 -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -97,6 +97,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key @@ -116,16 +132,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -133,7 +149,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 1 - Statistics: Num rows: 61 Data size: 30250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 132 Data size: 63932 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -158,7 +174,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -166,7 +182,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -174,7 +190,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -182,7 +198,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -254,7 +270,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -38 +78 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -274,16 +290,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -291,7 +307,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 61 Data size: 30250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 132 Data size: 63932 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -316,7 +332,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -324,7 +340,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -332,7 +348,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -340,7 +356,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -412,7 +428,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -38 +78 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -432,16 +448,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -449,7 +465,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 61 Data size: 30250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 132 Data size: 63932 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -474,7 +490,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -482,7 +498,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -490,7 +506,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -498,7 +514,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -570,4 +586,4 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -38 +78 diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out index 8250eca099..6255dd2819 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out @@ -1,8 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_small -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_small @@ -22,27 +22,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -62,6 +46,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key @@ -81,16 +81,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -98,7 +98,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 1 - Statistics: Num rows: 1 Data size: 2486 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1254 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -122,7 +122,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -130,13 +130,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -144,7 +144,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -152,13 +152,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big @@ -232,16 +232,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -249,7 +249,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 1 Data size: 30250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 63932 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -273,7 +273,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -281,13 +281,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -295,7 +295,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -303,13 +303,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big @@ -382,16 +382,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) @@ -414,7 +414,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -422,13 +422,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -436,7 +436,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -444,13 +444,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small @@ -468,16 +468,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -487,7 +487,7 @@ STAGE PLANS: input vertices: 1 Map 3 Position of Big Table: 0 - Statistics: Num rows: 1 Data size: 30250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 63932 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -518,7 +518,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -526,13 +526,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -540,7 +540,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -548,13 +548,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out index eb813c1734..ac5cd47fbb 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out @@ -72,11 +72,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small@ds=2008-04-09 -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -97,6 +97,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-09') PREHOOK: type: LOAD #### A masked pattern was here #### @@ -114,6 +130,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key @@ -133,16 +165,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -150,7 +182,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 1 - Statistics: Num rows: 123 Data size: 60500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 264 Data size: 127864 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -175,7 +207,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -183,7 +215,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -191,7 +223,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -199,7 +231,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -224,7 +256,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -232,7 +264,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -240,7 +272,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -248,7 +280,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -323,7 +355,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -76 +156 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -343,16 +375,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -360,7 +392,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 123 Data size: 60500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 264 Data size: 127864 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -385,7 +417,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -393,7 +425,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -401,7 +433,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -409,7 +441,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -434,7 +466,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -442,7 +474,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -450,7 +482,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -458,7 +490,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -533,7 +565,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -76 +156 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -553,16 +585,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -570,7 +602,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 123 Data size: 60500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 264 Data size: 127864 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -595,7 +627,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -603,7 +635,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -611,7 +643,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -619,7 +651,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -644,7 +676,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -652,7 +684,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -660,7 +692,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -668,7 +700,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -743,4 +775,4 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -76 +156