diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java index 26afe90faa..a2ed301bc3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java @@ -108,14 +108,15 @@ public int compare(InputSplit inp1, InputSplit inp2) { private final Multimap bucketToTaskMap = HashMultimap. create(); private final Map> inputToGroupedSplitMap = - new HashMap>(); + new HashMap<>(); private int numInputsAffectingRootInputSpecUpdate = 1; private int numInputsSeenSoFar = 0; private final Map emMap = Maps.newHashMap(); private final List finalSplits = Lists.newLinkedList(); private final Map inputNameInputSpecMap = - new HashMap(); + new HashMap<>(); + private Map inputToBucketMap; public CustomPartitionVertex(VertexManagerPluginContext context) { super(context); @@ -137,6 +138,7 @@ public void initialize() { this.mainWorkName = vertexConf.getInputName(); this.vertexType = vertexConf.getVertexType(); this.numInputsAffectingRootInputSpecUpdate = vertexConf.getNumInputs(); + this.inputToBucketMap = vertexConf.getInputToBucketMap(); } @Override @@ -242,7 +244,7 @@ public void onRootVertexInitialized(String inputName, InputDescriptor inputDescr } Multimap bucketToInitialSplitMap = - getBucketSplitMapForPath(pathFileSplitsMap); + getBucketSplitMapForPath(inputName, pathFileSplitsMap); try { int totalResource = context.getTotalAvailableResource().getMemory(); @@ -532,20 +534,45 @@ private FileSplit getFileSplitFromEvent(InputDataInformationEvent event) throws /* * This method generates the map of bucket to file splits. */ - private Multimap getBucketSplitMapForPath( + private Multimap getBucketSplitMapForPath(String inputName, Map> pathFileSplitsMap) { - int bucketNum = 0; Multimap bucketToInitialSplitMap = - ArrayListMultimap. create(); + ArrayListMultimap.create(); + boolean fallback = false; + List bucketIds = new ArrayList<>(); for (Map.Entry> entry : pathFileSplitsMap.entrySet()) { - int bucketId = bucketNum % numBuckets; + // Extract the buckedID from pathFilesMap, this is more accurate method, + // however. it may not work in certain cases where buckets are named + // after files used while loading data. In such case, fallback to old + // potential inaccurate method + String bucketStr = entry.getKey().substring(0, entry.getKey().length() - 2); + int bucketId = -1; + try { + bucketId = Integer.parseInt(bucketStr); + } catch (NumberFormatException e) { + fallback = true; + LOG.info("Fallback to using older sort based logic to assign " + + "buckets to splits."); + bucketIds.clear(); + break; + } + bucketIds.add(bucketId); for (FileSplit fsplit : entry.getValue()) { bucketToInitialSplitMap.put(bucketId, fsplit); } - bucketNum++; + } + + int bucketNum = 0; + if (fallback) { + for (Map.Entry> entry : pathFileSplitsMap.entrySet()) { + for (FileSplit fsplit : entry.getValue()) { + bucketToInitialSplitMap.put(bucketNum, fsplit); + } + bucketNum++; + } } // this is just for SMB join use-case. The numBuckets would be equal to that of the big table @@ -553,16 +580,28 @@ private FileSplit getFileSplitFromEvent(InputDataInformationEvent event) throws // data from the right buckets to the big table side. For e.g. Big table has 8 buckets and small // table has 4 buckets, bucket 0 of small table needs to be sent to bucket 4 of the big table as // well. - if (bucketNum < numBuckets) { - int loopedBucketId = 0; - for (; bucketNum < numBuckets; bucketNum++) { - for (InputSplit fsplit : bucketToInitialSplitMap.get(loopedBucketId)) { - bucketToInitialSplitMap.put(bucketNum, fsplit); + if (numInputsAffectingRootInputSpecUpdate != 1 && + inputName.compareTo(mainWorkName) != 0) { + // small table + int inputNumBuckets = inputToBucketMap.get(inputName); + if (fallback && bucketNum != inputNumBuckets) { + // The fallback mechanism kicked in which only works correctly if + // there exists a file for each bucket, else it may result in wrong + // result. Throw an error + + } + if (inputNumBuckets < numBuckets) { + // Need to send the splits to multiple buckets + for (int i = 1; i < numBuckets/inputNumBuckets; i++) { + int bucketIdBase = i * inputNumBuckets; + for (Integer bucketId : bucketIds) { + for (InputSplit fsplit : bucketToInitialSplitMap.get(bucketId)) { + bucketToInitialSplitMap.put(bucketIdBase + bucketId, fsplit); + } + } } - loopedBucketId++; } } - return bucketToInitialSplitMap; } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomVertexConfiguration.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomVertexConfiguration.java index ef5e7edcd6..4301829517 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomVertexConfiguration.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomVertexConfiguration.java @@ -21,7 +21,10 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import com.google.common.base.Preconditions; import org.apache.hadoop.hive.ql.plan.TezWork.VertexType; import org.apache.hadoop.io.Writable; @@ -39,22 +42,24 @@ private VertexType vertexType = VertexType.AUTO_INITIALIZED_EDGES; private int numInputs; private String inputName; + private Map inputToBucketMap; public CustomVertexConfiguration() { } // this is the constructor to use for the Bucket map join case. public CustomVertexConfiguration(int numBuckets, VertexType vertexType) { - this(numBuckets, vertexType, "", 1); + this(numBuckets, vertexType, "", 1, null); } // this is the constructor to use for SMB. public CustomVertexConfiguration(int numBuckets, VertexType vertexType, String inputName, - int numInputs) { + int numInputs, Map inputToBucketMap) { this.numBuckets = numBuckets; this.vertexType = vertexType; this.numInputs = numInputs; this.inputName = inputName; + this.inputToBucketMap = inputToBucketMap; } @Override @@ -63,6 +68,14 @@ public void write(DataOutput out) throws IOException { out.writeInt(this.numBuckets); out.writeInt(numInputs); out.writeUTF(inputName); + int sz = inputToBucketMap != null ? inputToBucketMap.size() : 0; + out.writeInt(sz); + if (sz > 0) { + for (Map.Entry entry : inputToBucketMap.entrySet()) { + out.writeUTF(entry.getKey()); + out.writeInt(entry.getValue()); + } + } } @Override @@ -71,6 +84,16 @@ public void readFields(DataInput in) throws IOException { this.numBuckets = in.readInt(); this.numInputs = in.readInt(); this.inputName = in.readUTF(); + int sz = in.readInt(); + Preconditions.checkState(sz >= 0); + if (sz == 0) { + this.inputToBucketMap = null; + } else { + this.inputToBucketMap = new HashMap<>(); + for (int i = 0; i < sz; i++) { + this.inputToBucketMap.put(in.readUTF(), in.readInt()); + } + } } public int getNumBuckets() { @@ -88,4 +111,8 @@ public String getInputName() { public int getNumInputs() { return numInputs; } + + public Map getInputToBucketMap() { + return inputToBucketMap; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java index 9885038588..0e75f6e5e8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java @@ -21,6 +21,7 @@ import java.util.concurrent.ConcurrentHashMap; import com.google.common.base.Function; +import com.google.common.base.Preconditions; import com.google.common.collect.Iterators; import com.google.common.collect.Lists; import javax.security.auth.login.LoginException; @@ -568,13 +569,26 @@ private Vertex createVertex(JobConf conf, MergeJoinWork mergeJoinWork, FileSyste MultiMRInput.createConfigBuilder(conf, HiveInputFormat.class).build()); } + // To be populated for SMB joins only for all the small tables + Map inputToBucketMap = new HashMap<>(); + if (mergeJoinWork.getMergeJoinOperator().getParentOperators().size() == 1 + && mergeJoinWork.getMergeJoinOperator().getOpTraits() != null) { + // This is an SMB join. + for (BaseWork work : mapWorkList) { + MapWork mw = (MapWork) work; + Map> aliasToWork = mw.getAliasToWork(); + Preconditions.checkState(aliasToWork.size() == 1, + "More than 1 alias in SMB mapwork"); + inputToBucketMap.put(mw.getName(), mw.getWorks().get(0).getOpTraits().getNumBuckets()); + } + } VertexManagerPluginDescriptor desc = VertexManagerPluginDescriptor.create(CustomPartitionVertex.class.getName()); // the +1 to the size is because of the main work. CustomVertexConfiguration vertexConf = new CustomVertexConfiguration(mergeJoinWork.getMergeJoinOperator().getConf() .getNumBuckets(), vertexType, mergeJoinWork.getBigTableAlias(), - mapWorkList.size() + 1); + mapWorkList.size() + 1, inputToBucketMap); DataOutputBuffer dob = new DataOutputBuffer(); vertexConf.write(dob); byte[] userPayload = dob.getData(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index 9b0ffe0e91..25d1681f9b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -187,6 +187,8 @@ public void setTTable(org.apache.hadoop.hive.metastore.api.Table tTable) { t.setOwner(SessionState.getUserFromAuthenticator()); // set create time t.setCreateTime((int) (System.currentTimeMillis() / 1000)); + t.setBucketingVersion(2); + t.setExpertMode(false); } return t; } @@ -675,6 +677,10 @@ public int getNumBuckets() { return tTable.getSd().getNumBuckets(); } + public int getBucketingVersion() { + return tTable.getBucketingVersion(); + } + public void setInputFormatClass(String name) throws HiveException { if (name == null) { inputFormatClass = null; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index dc698c8de8..324b737cf9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -180,7 +180,8 @@ MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, mapJoinConversionPos, true); // map join operator by default has no bucket cols and num of reduce sinks // reduced by 1 - mapJoinOp.setOpTraits(new OpTraits(null, -1, null, joinOp.getOpTraits().getNumReduceSinks())); + mapJoinOp.setOpTraits(new OpTraits(null, -1, null, + joinOp.getOpTraits().getNumReduceSinks(), joinOp.getOpTraits().getBucketingVersion())); mapJoinOp.setStatistics(joinOp.getStatistics()); // propagate this change till the next RS for (Operator childOp : mapJoinOp.getChildOperators()) { @@ -378,7 +379,7 @@ private void convertJoinSMBJoin(JoinOperator joinOp, OptimizeTezProcContext cont joinOp.getSchema()); int numReduceSinks = joinOp.getOpTraits().getNumReduceSinks(); OpTraits opTraits = new OpTraits(joinOp.getOpTraits().getBucketColNames(), numBuckets, - joinOp.getOpTraits().getSortCols(), numReduceSinks); + joinOp.getOpTraits().getSortCols(), numReduceSinks, joinOp.getOpTraits().getBucketingVersion()); mergeJoinOp.setOpTraits(opTraits); mergeJoinOp.setStatistics(joinOp.getStatistics()); @@ -445,7 +446,8 @@ private void setAllChildrenTraits(Operator currentOp, Op return; } currentOp.setOpTraits(new OpTraits(opTraits.getBucketColNames(), - opTraits.getNumBuckets(), opTraits.getSortCols(), opTraits.getNumReduceSinks())); + opTraits.getNumBuckets(), opTraits.getSortCols(), opTraits.getNumReduceSinks(), + opTraits.getBucketingVersion())); for (Operator childOp : currentOp.getChildOperators()) { if ((childOp instanceof ReduceSinkOperator) || (childOp instanceof GroupByOperator)) { break; @@ -498,7 +500,8 @@ private boolean convertJoinBucketMapJoin(JoinOperator joinOp, OptimizeTezProcCon // we can set the traits for this join operator opTraits = new OpTraits(joinOp.getOpTraits().getBucketColNames(), - tezBucketJoinProcCtx.getNumBuckets(), null, joinOp.getOpTraits().getNumReduceSinks()); + tezBucketJoinProcCtx.getNumBuckets(), null, + joinOp.getOpTraits().getNumReduceSinks(), joinOp.getOpTraits().getBucketingVersion()); mapJoinOp.setOpTraits(opTraits); mapJoinOp.setStatistics(joinOp.getStatistics()); setNumberOfBucketsOnChildren(mapJoinOp); @@ -576,6 +579,13 @@ private boolean checkConvertJoinSMBJoin(JoinOperator joinOp, OptimizeTezProcCont return false; } ReduceSinkOperator rsOp = (ReduceSinkOperator) parentOp; + // If the chosen big table has less number of buckets than any of the + // small tables, then those buckets will have no mapping to any of the + // big table buckets resulting in wrong results. + if (numBuckets > 0 && numBuckets < rsOp.getOpTraits().getNumBuckets()) { + LOG.info("Small table has more buckets than big table."); + return false; + } if (!checkColEquality(rsOp.getParentOperators().get(0).getOpTraits().getSortCols(), rsOp .getOpTraits().getSortCols(), rsOp.getColumnExprMap(), false)) { LOG.info("We cannot convert to SMB because the sort column names do not match."); @@ -593,6 +603,36 @@ private boolean checkConvertJoinSMBJoin(JoinOperator joinOp, OptimizeTezProcCont numBuckets = bigTableRS.getConf().getNumReducers(); } tezBucketJoinProcCtx.setNumBuckets(numBuckets); + + // With bucketing using two different versions. Version 1 for exiting + // tables and version 2 for new tables. All the inputs to the SMB must be + // from same version. This only applies to tables read directly and not + // intermediate outputs of joins/groupbys + int version = -1; + for (Operator parentOp : joinOp.getParentOperators()) { + // Check if the parent is coming from a table scan, if so, what is the version of it. + assert parentOp.getParentOperators() != null && parentOp.getParentOperators().size() == 1; + Operator op = parentOp.getParentOperators().get(0); + while(op != null && !(op instanceof TableScanOperator + || op instanceof ReduceSinkOperator + || op instanceof CommonJoinOperator)) { + // If op has parents it is guaranteed to be 1. + op = op.getParentOperators().size() > 0 ? + op.getParentOperators().get(0) : null; + } + + if (op instanceof TableScanOperator) { + int localVersion = ((TableScanOperator)op).getConf(). + getTableMetadata().getBucketingVersion(); + if (version == -1) { + version = localVersion; + } else if (version != localVersion) { + // versions dont match, return false. + LOG.info("SMB Join can't be performed due to bucketing version mismatch"); + return false; + } + } + } LOG.info("We can convert the join to an SMB join."); return true; } @@ -1168,7 +1208,8 @@ private boolean convertJoinDynamicPartitionedHashJoin(JoinOperator joinOp, Optim joinOp.getOpTraits().getBucketColNames(), numReducers, null, - joinOp.getOpTraits().getNumReduceSinks()); + joinOp.getOpTraits().getNumReduceSinks(), + joinOp.getOpTraits().getBucketingVersion()); mapJoinOp.setOpTraits(opTraits); mapJoinOp.setStatistics(joinOp.getStatistics()); // propagate this change till the next RS diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java index 69d9f3125a..ac2f75a7bc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java @@ -92,10 +92,12 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, List> listBucketCols = new ArrayList>(); int numBuckets = -1; int numReduceSinks = 1; + int bucketingVersion = -1; OpTraits parentOpTraits = rs.getParentOperators().get(0).getOpTraits(); if (parentOpTraits != null) { numBuckets = parentOpTraits.getNumBuckets(); numReduceSinks += parentOpTraits.getNumReduceSinks(); + bucketingVersion = parentOpTraits.getBucketingVersion(); } List bucketCols = new ArrayList<>(); @@ -134,7 +136,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } listBucketCols.add(bucketCols); - OpTraits opTraits = new OpTraits(listBucketCols, numBuckets, listBucketCols, numReduceSinks); + OpTraits opTraits = new OpTraits(listBucketCols, numBuckets, + listBucketCols, numReduceSinks, bucketingVersion); rs.setOpTraits(opTraits); return null; } @@ -213,7 +216,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, sortedColsList.add(sortCols); } // num reduce sinks hardcoded to 0 because TS has no parents - OpTraits opTraits = new OpTraits(bucketColsList, numBuckets, sortedColsList, 0); + OpTraits opTraits = new OpTraits(bucketColsList, numBuckets, + sortedColsList, 0, table.getBucketingVersion()); ts.setOpTraits(opTraits); return null; } @@ -239,12 +243,15 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, List> listBucketCols = new ArrayList>(); int numReduceSinks = 0; + int bucketingVersion = -1; OpTraits parentOpTraits = gbyOp.getParentOperators().get(0).getOpTraits(); if (parentOpTraits != null) { numReduceSinks = parentOpTraits.getNumReduceSinks(); + bucketingVersion = parentOpTraits.getBucketingVersion(); } listBucketCols.add(gbyKeys); - OpTraits opTraits = new OpTraits(listBucketCols, -1, listBucketCols, numReduceSinks); + OpTraits opTraits = new OpTraits(listBucketCols, -1, listBucketCols, + numReduceSinks, bucketingVersion); gbyOp.setOpTraits(opTraits); return null; } @@ -298,12 +305,15 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, int numBuckets = -1; int numReduceSinks = 0; + int bucketingVersion = -1; OpTraits parentOpTraits = selOp.getParentOperators().get(0).getOpTraits(); if (parentOpTraits != null) { numBuckets = parentOpTraits.getNumBuckets(); numReduceSinks = parentOpTraits.getNumReduceSinks(); + bucketingVersion = parentOpTraits.getBucketingVersion(); } - OpTraits opTraits = new OpTraits(listBucketCols, numBuckets, listSortCols, numReduceSinks); + OpTraits opTraits = new OpTraits(listBucketCols, numBuckets, listSortCols, + numReduceSinks, bucketingVersion); selOp.setOpTraits(opTraits); return null; } @@ -319,6 +329,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, List> sortColsList = new ArrayList>(); byte pos = 0; int numReduceSinks = 0; // will be set to the larger of the parents + int bucketingVersion = -1; + boolean bucketingVersionSeen = false; for (Operator parentOp : joinOp.getParentOperators()) { if (!(parentOp instanceof ReduceSinkOperator)) { // can be mux operator @@ -335,10 +347,18 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if (parentOpTraits.getNumReduceSinks() > numReduceSinks) { numReduceSinks = parentOpTraits.getNumReduceSinks(); } + // If there is mismatch in bucketingVersion, then it should be set to + // -1, that way SMB will be disabled. + if (bucketingVersion == -1 && !bucketingVersionSeen) { + bucketingVersion = parentOpTraits.getBucketingVersion(); + bucketingVersionSeen = true; + } else if (bucketingVersion != parentOpTraits.getBucketingVersion()) { + bucketingVersion = -1; + } pos++; } - joinOp.setOpTraits(new OpTraits(bucketColsList, -1, bucketColsList, numReduceSinks)); + joinOp.setOpTraits(new OpTraits(bucketColsList, -1, bucketColsList, numReduceSinks, bucketingVersion)); return null; } @@ -392,6 +412,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Operator operator = (Operator) nd; int numReduceSinks = 0; + int bucketingVersion = -1; + boolean bucketingVersionSeen = false; for (Operator parentOp : operator.getParentOperators()) { if (parentOp.getOpTraits() == null) { continue; @@ -399,8 +421,17 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if (parentOp.getOpTraits().getNumReduceSinks() > numReduceSinks) { numReduceSinks = parentOp.getOpTraits().getNumReduceSinks(); } + // If there is mismatch in bucketingVersion, then it should be set to + // -1, that way SMB will be disabled. + if (bucketingVersion == -1 && !bucketingVersionSeen) { + bucketingVersion = parentOp.getOpTraits().getBucketingVersion(); + bucketingVersionSeen = true; + } else if (bucketingVersion != parentOp.getOpTraits().getBucketingVersion()) { + bucketingVersion = -1; + } } - OpTraits opTraits = new OpTraits(null, -1, null, numReduceSinks); + OpTraits opTraits = new OpTraits(null, -1, + null, numReduceSinks, bucketingVersion); operator.setOpTraits(opTraits); return null; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java index bacc44482a..39d2370435 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java @@ -121,7 +121,8 @@ } // we can set the traits for this join operator - OpTraits opTraits = new OpTraits(bucketColNames, numBuckets, null, joinOp.getOpTraits().getNumReduceSinks()); + OpTraits opTraits = new OpTraits(bucketColNames, numBuckets, null, + joinOp.getOpTraits().getNumReduceSinks(), joinOp.getOpTraits().getBucketingVersion()); mapJoinOp.setOpTraits(opTraits); mapJoinOp.setStatistics(joinOp.getStatistics()); setNumberOfBucketsOnChildren(mapJoinOp); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java index 54f5bab6de..05ff084079 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java @@ -160,6 +160,50 @@ private URI initializeFromURI(String fromPath, boolean isLocal) throws IOExcepti "source contains directory: " + oneSrc.getPath().toString())); } } + // Do another loop if table is bucketed + List bucketCols = table.getBucketCols(); + if (bucketCols != null && !bucketCols.isEmpty()) { + // Hive assumes that user names the files as per the corresponding + // bucket. For e.g, file names should follow the format 000000_0, 000000_1 etc. + // Here the 1st file will belong to bucket 0 and 2nd to bucket 1 and so on. + boolean[] bucketArray = new boolean[table.getNumBuckets()]; + // initialize the array + int numBuckets = table.getNumBuckets(); + for (int i = 0; i < numBuckets; i++) { + bucketArray[i] = false; + } + + for (FileStatus oneSrc : srcs) { + String bucketName = oneSrc.getPath().getName(); + + //get the bucket id + String bucketIdStr = + Utilities.getBucketFileNameFromPathSubString(bucketName); + int bucketId = Utilities.getBucketIdFromFile(bucketIdStr); + LOG.info("bucket ID for file " + oneSrc.getPath() + " = " + bucketId + + " for table " + table.getFullyQualifiedName()); + if (bucketId == -1) { + throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg( + "The file name is invalid : " + + oneSrc.getPath().toString() + " for table " + + table.getFullyQualifiedName())); + } + if (bucketId >= numBuckets) { + throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg( + "The file name corresponds to invalid bucketId : " + + oneSrc.getPath().toString()) + + ". Maximum number of buckets can be " + numBuckets + + " for table " + table.getFullyQualifiedName()); + } + if (bucketArray[bucketId]) { + throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg( + "Multiple files for same bucket : " + bucketId + + ". Only 1 file per bucket allowed in single load command. To load multiple files for same bucket, use multiple statements for table " + + table.getFullyQualifiedName())); + } + bucketArray[bucketId] = true; + } + } } catch (IOException e) { // Has to use full name to make sure it does not conflict with // org.apache.commons.lang.StringUtils diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/OpTraits.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/OpTraits.java index 9621c3be53..6cf6c31ba8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/OpTraits.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/OpTraits.java @@ -22,17 +22,20 @@ public class OpTraits { - List> bucketColNames; - List> sortColNames; - int numBuckets; - int numReduceSinks; + private List> bucketColNames; + private List> sortColNames; + private int numBuckets; + private int numReduceSinks; + private int bucketingVersion; public OpTraits(List> bucketColNames, int numBuckets, - List> sortColNames, int numReduceSinks) { + List> sortColNames, int numReduceSinks, + int bucketingVersion) { this.bucketColNames = bucketColNames; this.numBuckets = numBuckets; this.sortColNames = sortColNames; this.numReduceSinks = numReduceSinks; + this.bucketingVersion = bucketingVersion; } public List> getBucketColNames() { @@ -68,6 +71,13 @@ public int getNumReduceSinks() { return this.numReduceSinks; } + public void setBucketingVersion(int bucketingVersion) { + this.bucketingVersion = bucketingVersion; + } + + public int getBucketingVersion() { + return bucketingVersion; + } @Override public String toString() { diff --git a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHive.java b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHive.java index aa95d2fcdc..6dd0e1dcdb 100755 --- a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHive.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHive.java @@ -173,6 +173,8 @@ public void testTable() throws Throwable { tbl.setRewriteEnabled(false); tbl.setCreationMetadata(new HashMap()); + tbl.getTTable().setBucketingVersion(2); + tbl.getTTable().setExpertMode(false); // create table setNullCreateTableGrants(); diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q index e5fdcb57e4..b7bd10eed2 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q @@ -1,19 +1,21 @@ set hive.strict.checks.bucketing=false; set hive.mapred.mode=nonstrict; --- small 1 part, 4 bucket & big 2 part, 2 bucket -CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +-- small 1 part, 2 bucket & big 2 part, 4 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; load data local inpath '../../data/files/auto_sortmerge_join/small/000000_0' INTO TABLE bucket_small partition(ds='2008-04-08'); load data local inpath '../../data/files/auto_sortmerge_join/small/000001_0' INTO TABLE bucket_small partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small partition(ds='2008-04-08'); -CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-08'); load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08'); load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-09'); load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09'); set hive.auto.convert.join=true; set hive.auto.convert.sortmerge.join=true; diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q index abf09e5534..9f719aebb5 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q @@ -1,7 +1,7 @@ set hive.strict.checks.bucketing=false; set hive.mapred.mode=nonstrict; --- small 2 part, 4 bucket & big 1 part, 2 bucket +-- small 2 part, 4 bucket & big 1 part, 4 bucket CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; load data local inpath '../../data/files/auto_sortmerge_join/small/000000_0' INTO TABLE bucket_small partition(ds='2008-04-08'); load data local inpath '../../data/files/auto_sortmerge_join/small/000001_0' INTO TABLE bucket_small partition(ds='2008-04-08'); @@ -13,9 +13,11 @@ load data local inpath '../../data/files/auto_sortmerge_join/small/000001_0' INT load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small partition(ds='2008-04-09'); load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small partition(ds='2008-04-09'); -CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-08'); load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08'); set hive.auto.convert.join=true; set hive.auto.convert.sortmerge.join=true; diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q index b85c4a7aa3..c107501d0a 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q @@ -1,19 +1,19 @@ set hive.strict.checks.bucketing=false; set hive.mapred.mode=nonstrict; --- small no part, 4 bucket & big no part, 2 bucket +-- small no part, 2 bucket & big no part, 4 bucket -- SORT_QUERY_RESULTS -CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; load data local inpath '../../data/files/auto_sortmerge_join/small/000000_0' INTO TABLE bucket_small; load data local inpath '../../data/files/auto_sortmerge_join/small/000001_0' INTO TABLE bucket_small; -load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small; -load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small; -CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big; load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big; +load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big; +load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big; set hive.auto.convert.sortmerge.join=true; set hive.optimize.bucketmapjoin = true; diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q index bd780861e3..a5cc04a97f 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q @@ -1,7 +1,7 @@ set hive.strict.checks.bucketing=false; set hive.mapred.mode=nonstrict; --- small 2 part, 4 bucket & big 2 part, 2 bucket +-- small 2 part, 4 bucket & big 2 part, 4 bucket CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; load data local inpath '../../data/files/auto_sortmerge_join/small/000000_0' INTO TABLE bucket_small partition(ds='2008-04-08'); load data local inpath '../../data/files/auto_sortmerge_join/small/000001_0' INTO TABLE bucket_small partition(ds='2008-04-08'); @@ -13,12 +13,16 @@ load data local inpath '../../data/files/auto_sortmerge_join/small/000001_0' INT load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small partition(ds='2008-04-09'); load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small partition(ds='2008-04-09'); -CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-08'); load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08'); load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-09'); load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09'); set hive.auto.convert.join=true; set hive.auto.convert.sortmerge.join=true; diff --git a/ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out b/ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out index b9c2e6f827..37dbbf9ff3 100644 --- a/ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out +++ b/ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out @@ -53,174 +53,4 @@ POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@srcbucket_mapjoin_part_2 -PREHOOK: query: load data local inpath '../../data/files/bmj/000002_0' - INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@srcbucket_mapjoin_part_2 -POSTHOOK: query: load data local inpath '../../data/files/bmj/000002_0' - INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@srcbucket_mapjoin_part_2 -POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 -PREHOOK: query: load data local inpath '../../data/files/bmj/000003_0' - INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 -POSTHOOK: query: load data local inpath '../../data/files/bmj/000003_0' - INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') -POSTHOOK: type: LOAD #### A masked pattern was here #### -POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 -PREHOOK: query: explain -select a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b -on a.key=b.key and a.ds="2008-04-08" and b.ds="2008-04-08" -PREHOOK: type: QUERY -POSTHOOK: query: explain -select a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b -on a.key=b.key and a.ds="2008-04-08" and b.ds="2008-04-08" -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 108 Data size: 42000 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 108 Data size: 42000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 108 Data size: 42000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 108 Data size: 42000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - TableScan - alias: b - Statistics: Num rows: 78 Data size: 30620 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 78 Data size: 30620 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 78 Data size: 30620 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 78 Data size: 30620 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col4 - Statistics: Num rows: 118 Data size: 46200 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 118 Data size: 46200 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 118 Data size: 46200 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain -select /*+mapjoin(b)*/ a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b -on a.key=b.key and a.ds="2008-04-08" and b.ds="2008-04-08" -PREHOOK: type: QUERY -POSTHOOK: query: explain -select /*+mapjoin(b)*/ a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b -on a.key=b.key and a.ds="2008-04-08" and b.ds="2008-04-08" -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-3 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 102 Data size: 30620 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 102 Data size: 30620 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 140 Data size: 42000 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 140 Data size: 42000 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col7 - Statistics: Num rows: 154 Data size: 46200 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 154 Data size: 46200 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 154 Data size: 46200 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -FAILED: SemanticException [Error 10136]: Bucketed mapjoin cannot be performed. This can be due to multiple reasons: . Join columns dont match bucketed columns. . Number of buckets are not a multiple of each other. If you really want to perform the operation, either remove the mapjoin hint from your query or set hive.enforce.bucketmapjoin to false. diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out index 5cfc35aa73..dda72115c5 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out @@ -1,8 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_small -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_small @@ -23,27 +23,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small@ds=2008-04-08 -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small partition(ds='2008-04-08') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small@ds=2008-04-08 -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small partition(ds='2008-04-08') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small@ds=2008-04-08 -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small partition(ds='2008-04-08') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small@ds=2008-04-08 -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small partition(ds='2008-04-08') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small@ds=2008-04-08 -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -64,6 +48,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-09') PREHOOK: type: LOAD #### A masked pattern was here #### @@ -81,6 +81,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -95,16 +111,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -134,7 +150,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -142,7 +158,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -150,7 +166,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -158,7 +174,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -183,7 +199,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -191,7 +207,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -199,7 +215,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -207,7 +223,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -308,7 +324,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -316,7 +332,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -324,7 +340,7 @@ STAGE PLANS: serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -332,7 +348,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -353,16 +369,16 @@ STAGE PLANS: $hdt$_1:b TableScan alias: b - Statistics: Num rows: 4 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1140 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 4 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1140 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1140 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) @@ -374,16 +390,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -414,7 +430,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -422,7 +438,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -430,7 +446,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -438,7 +454,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -463,7 +479,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -471,7 +487,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -479,7 +495,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -487,7 +503,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -511,7 +527,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -519,7 +535,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -527,7 +543,7 @@ STAGE PLANS: serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -535,7 +551,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -597,7 +613,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -605,7 +621,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -613,7 +629,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -621,7 +637,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -645,7 +661,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -653,7 +669,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -661,7 +677,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -669,7 +685,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -690,16 +706,16 @@ STAGE PLANS: $hdt$_0:a TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) @@ -711,16 +727,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 4 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1140 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 4 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1140 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1140 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -751,7 +767,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -759,7 +775,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -767,7 +783,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -775,7 +791,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -800,7 +816,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -808,7 +824,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -816,7 +832,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -824,7 +840,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -848,7 +864,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -856,7 +872,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -864,7 +880,7 @@ STAGE PLANS: serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -872,7 +888,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -924,16 +940,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -963,7 +979,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -971,7 +987,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -979,7 +995,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -987,7 +1003,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1012,7 +1028,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1020,7 +1036,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -1028,7 +1044,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1036,7 +1052,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out index 0d586fd26b..b54c574358 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out @@ -72,11 +72,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small@ds=2008-04-09 -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -97,6 +97,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key @@ -111,16 +127,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -150,7 +166,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -158,7 +174,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -166,7 +182,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -174,7 +190,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -243,7 +259,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -38 +78 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -258,16 +274,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -297,7 +313,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -305,7 +321,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -313,7 +329,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -321,7 +337,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -390,7 +406,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -38 +78 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -534,16 +550,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -574,7 +590,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -582,7 +598,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -590,7 +606,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -598,7 +614,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -755,7 +771,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -763,7 +779,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -771,7 +787,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -779,7 +795,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -800,16 +816,16 @@ STAGE PLANS: $hdt$_0:a TableScan alias: a - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) @@ -861,7 +877,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -869,7 +885,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -877,7 +893,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -885,7 +901,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1034,16 +1050,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -1073,7 +1089,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1081,7 +1097,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -1089,7 +1105,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1097,7 +1113,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1166,4 +1182,4 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -38 +78 diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out index 45704d1253..451c3b3353 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out @@ -1,8 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_small -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_small @@ -22,27 +22,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -62,6 +46,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key @@ -76,16 +76,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -114,7 +114,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -122,13 +122,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -136,7 +136,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -144,13 +144,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big @@ -216,16 +216,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -254,7 +254,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -262,13 +262,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -276,7 +276,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -284,13 +284,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big @@ -369,16 +369,16 @@ STAGE PLANS: $hdt$_1:b TableScan alias: b - Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) @@ -390,16 +390,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -429,7 +429,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -437,13 +437,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -451,7 +451,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -459,13 +459,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big @@ -475,7 +475,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -492,7 +492,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -500,13 +500,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small @@ -551,16 +551,16 @@ STAGE PLANS: $hdt$_0:a TableScan alias: a - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) @@ -572,16 +572,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -611,7 +611,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -619,13 +619,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -633,7 +633,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -641,13 +641,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big @@ -657,7 +657,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -674,7 +674,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -682,13 +682,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small @@ -728,16 +728,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -766,7 +766,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -774,13 +774,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -788,7 +788,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -796,13 +796,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out index 1959075912..f335142360 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out @@ -72,11 +72,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small@ds=2008-04-09 -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -97,6 +97,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-09') PREHOOK: type: LOAD #### A masked pattern was here #### @@ -114,6 +130,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key @@ -128,16 +160,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -167,7 +199,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -175,7 +207,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -183,7 +215,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -191,7 +223,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -216,7 +248,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -224,7 +256,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -232,7 +264,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -240,7 +272,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -312,7 +344,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -76 +156 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -327,16 +359,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -366,7 +398,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -374,7 +406,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -382,7 +414,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -390,7 +422,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -415,7 +447,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -423,7 +455,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -431,7 +463,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -439,7 +471,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -511,7 +543,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -76 +156 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -655,16 +687,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -695,7 +727,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -703,7 +735,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -711,7 +743,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -719,7 +751,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -744,7 +776,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -752,7 +784,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -760,7 +792,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -768,7 +800,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -926,7 +958,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -934,7 +966,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -942,7 +974,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -950,7 +982,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -974,7 +1006,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -982,7 +1014,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -990,7 +1022,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -998,7 +1030,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1019,16 +1051,16 @@ STAGE PLANS: $hdt$_0:a TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) @@ -1080,7 +1112,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1088,7 +1120,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -1096,7 +1128,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1104,7 +1136,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1129,7 +1161,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1137,7 +1169,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -1145,7 +1177,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1153,7 +1185,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1302,16 +1334,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -1341,7 +1373,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1349,7 +1381,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -1357,7 +1389,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1365,7 +1397,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1390,7 +1422,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1398,7 +1430,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -1406,7 +1438,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1414,7 +1446,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1486,4 +1518,4 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -76 +156 diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_2.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_2.q.out index 054b0d00be..d4472cf2a0 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_2.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_2.q.out @@ -1,8 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_small -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_small @@ -23,27 +23,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small@ds=2008-04-08 -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small partition(ds='2008-04-08') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small@ds=2008-04-08 -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small partition(ds='2008-04-08') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small@ds=2008-04-08 -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small partition(ds='2008-04-08') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small@ds=2008-04-08 -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small partition(ds='2008-04-08') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small@ds=2008-04-08 -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -64,6 +48,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-09') PREHOOK: type: LOAD #### A masked pattern was here #### @@ -81,6 +81,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -101,16 +117,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 4 Data size: 2996 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1508 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 4 Data size: 2996 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1508 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 2996 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1508 Basic stats: COMPLETE Column stats: NONE Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -121,7 +137,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -129,7 +145,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -137,7 +153,7 @@ STAGE PLANS: serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -145,7 +161,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -167,16 +183,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 74872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 158376 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 107 Data size: 71529 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 228 Data size: 150457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 107 Data size: 71529 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 228 Data size: 150457 Basic stats: COMPLETE Column stats: NONE Merge Join Operator condition map: Inner Join 0 to 1 @@ -184,7 +200,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 117 Data size: 78681 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 165502 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -208,7 +224,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -216,7 +232,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -224,7 +240,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -232,7 +248,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -256,7 +272,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -264,7 +280,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -272,7 +288,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -280,7 +296,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -375,16 +391,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 4 Data size: 2996 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1508 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 4 Data size: 2996 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1508 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 2996 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1508 Basic stats: COMPLETE Column stats: NONE Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -395,7 +411,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -403,7 +419,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -411,7 +427,7 @@ STAGE PLANS: serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -419,7 +435,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -441,16 +457,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 74872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 158376 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 107 Data size: 71529 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 228 Data size: 150457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 107 Data size: 71529 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 228 Data size: 150457 Basic stats: COMPLETE Column stats: NONE Merge Join Operator condition map: Inner Join 0 to 1 @@ -458,7 +474,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 117 Data size: 78681 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 165502 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -482,7 +498,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -490,7 +506,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -498,7 +514,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -506,7 +522,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -530,7 +546,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -538,7 +554,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -546,7 +562,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -554,7 +570,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_4.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_4.q.out index 95d329862c..5cd5d798bc 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_4.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_4.q.out @@ -72,11 +72,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small@ds=2008-04-09 -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -97,6 +97,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key @@ -232,16 +248,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 56 Data size: 37620 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 79280 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 54 Data size: 36276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 114 Data size: 75316 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 54 Data size: 36276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 114 Data size: 75316 Basic stats: COMPLETE Column stats: NONE Merge Join Operator condition map: Inner Join 0 to 1 @@ -249,7 +265,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 1 - Statistics: Num rows: 59 Data size: 39903 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 82847 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -273,7 +289,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -281,7 +297,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -289,7 +305,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -297,7 +313,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -370,7 +386,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -38 +78 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -506,16 +522,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 56 Data size: 37620 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 79280 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 54 Data size: 36276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 114 Data size: 75316 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 54 Data size: 36276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 114 Data size: 75316 Basic stats: COMPLETE Column stats: NONE Merge Join Operator condition map: Inner Join 0 to 1 @@ -523,7 +539,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 59 Data size: 39903 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 82847 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -547,7 +563,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -555,7 +571,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -563,7 +579,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -571,7 +587,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -644,7 +660,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -38 +78 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -780,16 +796,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 56 Data size: 37620 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 79280 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 54 Data size: 36276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 114 Data size: 75316 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 54 Data size: 36276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 114 Data size: 75316 Basic stats: COMPLETE Column stats: NONE Merge Join Operator condition map: Inner Join 0 to 1 @@ -797,7 +813,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 59 Data size: 39903 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 82847 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -821,7 +837,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -829,7 +845,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -837,7 +853,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -845,7 +861,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -918,4 +934,4 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -38 +78 diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_5.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_5.q.out index e711715aa5..a18f4b21fc 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_5.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_5.q.out @@ -1,8 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_small -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_small @@ -22,27 +22,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -62,6 +46,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key @@ -101,7 +101,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -109,13 +109,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -123,7 +123,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -131,13 +131,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small @@ -187,7 +187,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -195,13 +195,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -209,7 +209,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -217,13 +217,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big @@ -318,7 +318,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -326,13 +326,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -340,7 +340,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -348,13 +348,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small @@ -404,7 +404,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -412,13 +412,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -426,7 +426,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -434,13 +434,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big @@ -562,7 +562,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -570,13 +570,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -584,7 +584,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -592,13 +592,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big @@ -639,7 +639,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -647,13 +647,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -661,7 +661,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -669,13 +669,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out index 53c685cb11..fdea211fa4 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out @@ -72,11 +72,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small@ds=2008-04-09 -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -97,6 +97,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-09') PREHOOK: type: LOAD #### A masked pattern was here #### @@ -114,6 +130,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key @@ -249,16 +281,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 112 Data size: 74872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 158376 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 107 Data size: 71529 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 228 Data size: 150457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 107 Data size: 71529 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 228 Data size: 150457 Basic stats: COMPLETE Column stats: NONE Merge Join Operator condition map: Inner Join 0 to 1 @@ -266,7 +298,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 1 - Statistics: Num rows: 117 Data size: 78681 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 165502 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -290,7 +322,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -298,7 +330,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -306,7 +338,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -314,7 +346,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -338,7 +370,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -346,7 +378,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -354,7 +386,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -362,7 +394,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -438,7 +470,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -76 +156 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -574,16 +606,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 74872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 158376 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 107 Data size: 71529 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 228 Data size: 150457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 107 Data size: 71529 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 228 Data size: 150457 Basic stats: COMPLETE Column stats: NONE Merge Join Operator condition map: Inner Join 0 to 1 @@ -591,7 +623,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 117 Data size: 78681 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 165502 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -615,7 +647,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -623,7 +655,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -631,7 +663,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -639,7 +671,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -663,7 +695,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -671,7 +703,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -679,7 +711,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -687,7 +719,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -763,7 +795,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -76 +156 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -899,16 +931,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 74872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 158376 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 107 Data size: 71529 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 228 Data size: 150457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 107 Data size: 71529 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 228 Data size: 150457 Basic stats: COMPLETE Column stats: NONE Merge Join Operator condition map: Inner Join 0 to 1 @@ -916,7 +948,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 117 Data size: 78681 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 165502 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -940,7 +972,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -948,7 +980,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -956,7 +988,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -964,7 +996,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -988,7 +1020,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -996,7 +1028,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -1004,7 +1036,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1012,7 +1044,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1088,4 +1120,4 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -76 +156 diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out index 8cfa113794..117ff4aecc 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out @@ -1,8 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_small -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_small @@ -23,27 +23,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small@ds=2008-04-08 -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small partition(ds='2008-04-08') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small@ds=2008-04-08 -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small partition(ds='2008-04-08') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small@ds=2008-04-08 -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small partition(ds='2008-04-08') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small@ds=2008-04-08 -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small partition(ds='2008-04-08') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small@ds=2008-04-08 -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -64,6 +48,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-09') PREHOOK: type: LOAD #### A masked pattern was here #### @@ -81,6 +81,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -100,16 +116,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -117,7 +133,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 123 Data size: 60500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 264 Data size: 127864 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -142,7 +158,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -150,7 +166,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -158,7 +174,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -166,7 +182,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -191,7 +207,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -199,7 +215,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -207,7 +223,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -215,7 +231,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -308,16 +324,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -325,7 +341,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 123 Data size: 60500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 264 Data size: 127864 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -350,7 +366,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -358,7 +374,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -366,7 +382,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -374,7 +390,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -399,7 +415,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -407,7 +423,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -415,7 +431,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -423,7 +439,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out index fce5e0cfc4..aff5a0d242 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out @@ -72,11 +72,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small@ds=2008-04-09 -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -97,6 +97,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key @@ -116,16 +132,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -133,7 +149,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 1 - Statistics: Num rows: 61 Data size: 30250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 132 Data size: 63932 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -158,7 +174,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -166,7 +182,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -174,7 +190,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -182,7 +198,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -254,7 +270,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -38 +78 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -274,16 +290,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -291,7 +307,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 61 Data size: 30250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 132 Data size: 63932 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -316,7 +332,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -324,7 +340,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -332,7 +348,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -340,7 +356,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -412,7 +428,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -38 +78 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -432,16 +448,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -449,7 +465,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 61 Data size: 30250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 132 Data size: 63932 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -474,7 +490,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -482,7 +498,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -490,7 +506,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -498,7 +514,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -570,4 +586,4 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -38 +78 diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out index 8250eca099..6255dd2819 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out @@ -1,8 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_small -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_small @@ -22,27 +22,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -62,6 +46,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key @@ -81,16 +81,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -98,7 +98,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 1 - Statistics: Num rows: 1 Data size: 2486 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1254 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -122,7 +122,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -130,13 +130,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -144,7 +144,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -152,13 +152,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big @@ -232,16 +232,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -249,7 +249,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 1 Data size: 30250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 63932 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -273,7 +273,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -281,13 +281,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -295,7 +295,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -303,13 +303,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big @@ -382,16 +382,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) @@ -414,7 +414,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -422,13 +422,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -436,7 +436,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -444,13 +444,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small @@ -468,16 +468,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -487,7 +487,7 @@ STAGE PLANS: input vertices: 1 Map 3 Position of Big Table: 0 - Statistics: Num rows: 1 Data size: 30250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 63932 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -518,7 +518,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -526,13 +526,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -540,7 +540,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -548,13 +548,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out index eb813c1734..ac5cd47fbb 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out @@ -72,11 +72,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small@ds=2008-04-09 -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -97,6 +97,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-09') PREHOOK: type: LOAD #### A masked pattern was here #### @@ -114,6 +130,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key @@ -133,16 +165,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -150,7 +182,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 1 - Statistics: Num rows: 123 Data size: 60500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 264 Data size: 127864 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -175,7 +207,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -183,7 +215,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -191,7 +223,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -199,7 +231,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -224,7 +256,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -232,7 +264,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -240,7 +272,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -248,7 +280,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -323,7 +355,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -76 +156 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -343,16 +375,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -360,7 +392,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 123 Data size: 60500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 264 Data size: 127864 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -385,7 +417,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -393,7 +425,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -401,7 +433,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -409,7 +441,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -434,7 +466,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -442,7 +474,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -450,7 +482,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -458,7 +490,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -533,7 +565,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -76 +156 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -553,16 +585,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -570,7 +602,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 123 Data size: 60500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 264 Data size: 127864 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -595,7 +627,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -603,7 +635,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -611,7 +643,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -619,7 +651,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -644,7 +676,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -652,7 +684,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -660,7 +692,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -668,7 +700,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -743,4 +775,4 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -76 +156 diff --git a/standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp b/standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp index 27f8c0f2fc..882a793e7d 100644 --- a/standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp +++ b/standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp @@ -4945,6 +4945,16 @@ void Table::__set_creationMetadata(const std::map & v __isset.creationMetadata = true; } +void Table::__set_bucketingVersion(const int32_t val) { + this->bucketingVersion = val; +__isset.bucketingVersion = true; +} + +void Table::__set_expertMode(const bool val) { + this->expertMode = val; +__isset.expertMode = true; +} + uint32_t Table::read(::apache::thrift::protocol::TProtocol* iprot) { apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); @@ -5136,6 +5146,22 @@ uint32_t Table::read(::apache::thrift::protocol::TProtocol* iprot) { xfer += iprot->skip(ftype); } break; + case 17: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->bucketingVersion); + this->__isset.bucketingVersion = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 18: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->expertMode); + this->__isset.expertMode = true; + } else { + xfer += iprot->skip(ftype); + } + break; default: xfer += iprot->skip(ftype); break; @@ -5247,6 +5273,16 @@ uint32_t Table::write(::apache::thrift::protocol::TProtocol* oprot) const { } xfer += oprot->writeFieldEnd(); } + if (this->__isset.bucketingVersion) { + xfer += oprot->writeFieldBegin("bucketingVersion", ::apache::thrift::protocol::T_I32, 17); + xfer += oprot->writeI32(this->bucketingVersion); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.expertMode) { + xfer += oprot->writeFieldBegin("expertMode", ::apache::thrift::protocol::T_BOOL, 18); + xfer += oprot->writeBool(this->expertMode); + xfer += oprot->writeFieldEnd(); + } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); return xfer; @@ -5270,6 +5306,8 @@ void swap(Table &a, Table &b) { swap(a.temporary, b.temporary); swap(a.rewriteEnabled, b.rewriteEnabled); swap(a.creationMetadata, b.creationMetadata); + swap(a.bucketingVersion, b.bucketingVersion); + swap(a.expertMode, b.expertMode); swap(a.__isset, b.__isset); } @@ -5290,6 +5328,8 @@ Table::Table(const Table& other233) { temporary = other233.temporary; rewriteEnabled = other233.rewriteEnabled; creationMetadata = other233.creationMetadata; + bucketingVersion = other233.bucketingVersion; + expertMode = other233.expertMode; __isset = other233.__isset; } Table& Table::operator=(const Table& other234) { @@ -5309,6 +5349,8 @@ Table& Table::operator=(const Table& other234) { temporary = other234.temporary; rewriteEnabled = other234.rewriteEnabled; creationMetadata = other234.creationMetadata; + bucketingVersion = other234.bucketingVersion; + expertMode = other234.expertMode; __isset = other234.__isset; return *this; } @@ -5331,6 +5373,8 @@ void Table::printTo(std::ostream& out) const { out << ", " << "temporary="; (__isset.temporary ? (out << to_string(temporary)) : (out << "")); out << ", " << "rewriteEnabled="; (__isset.rewriteEnabled ? (out << to_string(rewriteEnabled)) : (out << "")); out << ", " << "creationMetadata="; (__isset.creationMetadata ? (out << to_string(creationMetadata)) : (out << "")); + out << ", " << "bucketingVersion="; (__isset.bucketingVersion ? (out << to_string(bucketingVersion)) : (out << "")); + out << ", " << "expertMode="; (__isset.expertMode ? (out << to_string(expertMode)) : (out << "")); out << ")"; } diff --git a/standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h b/standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h index df646a7d17..15fd5ac05f 100644 --- a/standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h +++ b/standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h @@ -2372,7 +2372,7 @@ inline std::ostream& operator<<(std::ostream& out, const StorageDescriptor& obj) } typedef struct _Table__isset { - _Table__isset() : tableName(false), dbName(false), owner(false), createTime(false), lastAccessTime(false), retention(false), sd(false), partitionKeys(false), parameters(false), viewOriginalText(false), viewExpandedText(false), tableType(false), privileges(false), temporary(true), rewriteEnabled(false), creationMetadata(false) {} + _Table__isset() : tableName(false), dbName(false), owner(false), createTime(false), lastAccessTime(false), retention(false), sd(false), partitionKeys(false), parameters(false), viewOriginalText(false), viewExpandedText(false), tableType(false), privileges(false), temporary(true), rewriteEnabled(false), creationMetadata(false), bucketingVersion(true), expertMode(true) {} bool tableName :1; bool dbName :1; bool owner :1; @@ -2389,6 +2389,8 @@ typedef struct _Table__isset { bool temporary :1; bool rewriteEnabled :1; bool creationMetadata :1; + bool bucketingVersion :1; + bool expertMode :1; } _Table__isset; class Table { @@ -2396,7 +2398,7 @@ class Table { Table(const Table&); Table& operator=(const Table&); - Table() : tableName(), dbName(), owner(), createTime(0), lastAccessTime(0), retention(0), viewOriginalText(), viewExpandedText(), tableType(), temporary(false), rewriteEnabled(0) { + Table() : tableName(), dbName(), owner(), createTime(0), lastAccessTime(0), retention(0), viewOriginalText(), viewExpandedText(), tableType(), temporary(false), rewriteEnabled(0), bucketingVersion(1), expertMode(false) { } virtual ~Table() throw(); @@ -2416,6 +2418,8 @@ class Table { bool temporary; bool rewriteEnabled; std::map creationMetadata; + int32_t bucketingVersion; + bool expertMode; _Table__isset __isset; @@ -2451,6 +2455,10 @@ class Table { void __set_creationMetadata(const std::map & val); + void __set_bucketingVersion(const int32_t val); + + void __set_expertMode(const bool val); + bool operator == (const Table & rhs) const { if (!(tableName == rhs.tableName)) @@ -2493,6 +2501,14 @@ class Table { return false; else if (__isset.creationMetadata && !(creationMetadata == rhs.creationMetadata)) return false; + if (__isset.bucketingVersion != rhs.__isset.bucketingVersion) + return false; + else if (__isset.bucketingVersion && !(bucketingVersion == rhs.bucketingVersion)) + return false; + if (__isset.expertMode != rhs.__isset.expertMode) + return false; + else if (__isset.expertMode && !(expertMode == rhs.expertMode)) + return false; return true; } bool operator != (const Table &rhs) const { diff --git a/standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Table.java b/standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Table.java index f317b0393f..7375c1eb75 100644 --- a/standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Table.java +++ b/standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Table.java @@ -54,6 +54,8 @@ private static final org.apache.thrift.protocol.TField TEMPORARY_FIELD_DESC = new org.apache.thrift.protocol.TField("temporary", org.apache.thrift.protocol.TType.BOOL, (short)14); private static final org.apache.thrift.protocol.TField REWRITE_ENABLED_FIELD_DESC = new org.apache.thrift.protocol.TField("rewriteEnabled", org.apache.thrift.protocol.TType.BOOL, (short)15); private static final org.apache.thrift.protocol.TField CREATION_METADATA_FIELD_DESC = new org.apache.thrift.protocol.TField("creationMetadata", org.apache.thrift.protocol.TType.MAP, (short)16); + private static final org.apache.thrift.protocol.TField BUCKETING_VERSION_FIELD_DESC = new org.apache.thrift.protocol.TField("bucketingVersion", org.apache.thrift.protocol.TType.I32, (short)17); + private static final org.apache.thrift.protocol.TField EXPERT_MODE_FIELD_DESC = new org.apache.thrift.protocol.TField("expertMode", org.apache.thrift.protocol.TType.BOOL, (short)18); private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); static { @@ -77,6 +79,8 @@ private boolean temporary; // optional private boolean rewriteEnabled; // optional private Map creationMetadata; // optional + private int bucketingVersion; // optional + private boolean expertMode; // optional /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ public enum _Fields implements org.apache.thrift.TFieldIdEnum { @@ -95,7 +99,9 @@ PRIVILEGES((short)13, "privileges"), TEMPORARY((short)14, "temporary"), REWRITE_ENABLED((short)15, "rewriteEnabled"), - CREATION_METADATA((short)16, "creationMetadata"); + CREATION_METADATA((short)16, "creationMetadata"), + BUCKETING_VERSION((short)17, "bucketingVersion"), + EXPERT_MODE((short)18, "expertMode"); private static final Map byName = new HashMap(); @@ -142,6 +148,10 @@ public static _Fields findByThriftId(int fieldId) { return REWRITE_ENABLED; case 16: // CREATION_METADATA return CREATION_METADATA; + case 17: // BUCKETING_VERSION + return BUCKETING_VERSION; + case 18: // EXPERT_MODE + return EXPERT_MODE; default: return null; } @@ -187,8 +197,10 @@ public String getFieldName() { private static final int __RETENTION_ISSET_ID = 2; private static final int __TEMPORARY_ISSET_ID = 3; private static final int __REWRITEENABLED_ISSET_ID = 4; + private static final int __BUCKETINGVERSION_ISSET_ID = 5; + private static final int __EXPERTMODE_ISSET_ID = 6; private byte __isset_bitfield = 0; - private static final _Fields optionals[] = {_Fields.PRIVILEGES,_Fields.TEMPORARY,_Fields.REWRITE_ENABLED,_Fields.CREATION_METADATA}; + private static final _Fields optionals[] = {_Fields.PRIVILEGES,_Fields.TEMPORARY,_Fields.REWRITE_ENABLED,_Fields.CREATION_METADATA,_Fields.BUCKETING_VERSION,_Fields.EXPERT_MODE}; public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; static { Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); @@ -229,6 +241,10 @@ public String getFieldName() { new org.apache.thrift.meta_data.MapMetaData(org.apache.thrift.protocol.TType.MAP, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING), new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRUCT , "BasicTxnInfo")))); + tmpMap.put(_Fields.BUCKETING_VERSION, new org.apache.thrift.meta_data.FieldMetaData("bucketingVersion", org.apache.thrift.TFieldRequirementType.OPTIONAL, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32))); + tmpMap.put(_Fields.EXPERT_MODE, new org.apache.thrift.meta_data.FieldMetaData("expertMode", org.apache.thrift.TFieldRequirementType.OPTIONAL, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL))); metaDataMap = Collections.unmodifiableMap(tmpMap); org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(Table.class, metaDataMap); } @@ -236,6 +252,10 @@ public String getFieldName() { public Table() { this.temporary = false; + this.bucketingVersion = 1; + + this.expertMode = false; + } public Table( @@ -330,6 +350,8 @@ public Table(Table other) { } this.creationMetadata = __this__creationMetadata; } + this.bucketingVersion = other.bucketingVersion; + this.expertMode = other.expertMode; } public Table deepCopy() { @@ -359,6 +381,10 @@ public void clear() { setRewriteEnabledIsSet(false); this.rewriteEnabled = false; this.creationMetadata = null; + this.bucketingVersion = 1; + + this.expertMode = false; + } public String getTableName() { @@ -761,6 +787,50 @@ public void setCreationMetadataIsSet(boolean value) { } } + public int getBucketingVersion() { + return this.bucketingVersion; + } + + public void setBucketingVersion(int bucketingVersion) { + this.bucketingVersion = bucketingVersion; + setBucketingVersionIsSet(true); + } + + public void unsetBucketingVersion() { + __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __BUCKETINGVERSION_ISSET_ID); + } + + /** Returns true if field bucketingVersion is set (has been assigned a value) and false otherwise */ + public boolean isSetBucketingVersion() { + return EncodingUtils.testBit(__isset_bitfield, __BUCKETINGVERSION_ISSET_ID); + } + + public void setBucketingVersionIsSet(boolean value) { + __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __BUCKETINGVERSION_ISSET_ID, value); + } + + public boolean isExpertMode() { + return this.expertMode; + } + + public void setExpertMode(boolean expertMode) { + this.expertMode = expertMode; + setExpertModeIsSet(true); + } + + public void unsetExpertMode() { + __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __EXPERTMODE_ISSET_ID); + } + + /** Returns true if field expertMode is set (has been assigned a value) and false otherwise */ + public boolean isSetExpertMode() { + return EncodingUtils.testBit(__isset_bitfield, __EXPERTMODE_ISSET_ID); + } + + public void setExpertModeIsSet(boolean value) { + __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __EXPERTMODE_ISSET_ID, value); + } + public void setFieldValue(_Fields field, Object value) { switch (field) { case TABLE_NAME: @@ -891,6 +961,22 @@ public void setFieldValue(_Fields field, Object value) { } break; + case BUCKETING_VERSION: + if (value == null) { + unsetBucketingVersion(); + } else { + setBucketingVersion((Integer)value); + } + break; + + case EXPERT_MODE: + if (value == null) { + unsetExpertMode(); + } else { + setExpertMode((Boolean)value); + } + break; + } } @@ -944,6 +1030,12 @@ public Object getFieldValue(_Fields field) { case CREATION_METADATA: return getCreationMetadata(); + case BUCKETING_VERSION: + return getBucketingVersion(); + + case EXPERT_MODE: + return isExpertMode(); + } throw new IllegalStateException(); } @@ -987,6 +1079,10 @@ public boolean isSet(_Fields field) { return isSetRewriteEnabled(); case CREATION_METADATA: return isSetCreationMetadata(); + case BUCKETING_VERSION: + return isSetBucketingVersion(); + case EXPERT_MODE: + return isSetExpertMode(); } throw new IllegalStateException(); } @@ -1148,6 +1244,24 @@ public boolean equals(Table that) { return false; } + boolean this_present_bucketingVersion = true && this.isSetBucketingVersion(); + boolean that_present_bucketingVersion = true && that.isSetBucketingVersion(); + if (this_present_bucketingVersion || that_present_bucketingVersion) { + if (!(this_present_bucketingVersion && that_present_bucketingVersion)) + return false; + if (this.bucketingVersion != that.bucketingVersion) + return false; + } + + boolean this_present_expertMode = true && this.isSetExpertMode(); + boolean that_present_expertMode = true && that.isSetExpertMode(); + if (this_present_expertMode || that_present_expertMode) { + if (!(this_present_expertMode && that_present_expertMode)) + return false; + if (this.expertMode != that.expertMode) + return false; + } + return true; } @@ -1235,6 +1349,16 @@ public int hashCode() { if (present_creationMetadata) list.add(creationMetadata); + boolean present_bucketingVersion = true && (isSetBucketingVersion()); + list.add(present_bucketingVersion); + if (present_bucketingVersion) + list.add(bucketingVersion); + + boolean present_expertMode = true && (isSetExpertMode()); + list.add(present_expertMode); + if (present_expertMode) + list.add(expertMode); + return list.hashCode(); } @@ -1406,6 +1530,26 @@ public int compareTo(Table other) { return lastComparison; } } + lastComparison = Boolean.valueOf(isSetBucketingVersion()).compareTo(other.isSetBucketingVersion()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetBucketingVersion()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.bucketingVersion, other.bucketingVersion); + if (lastComparison != 0) { + return lastComparison; + } + } + lastComparison = Boolean.valueOf(isSetExpertMode()).compareTo(other.isSetExpertMode()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetExpertMode()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.expertMode, other.expertMode); + if (lastComparison != 0) { + return lastComparison; + } + } return 0; } @@ -1541,6 +1685,18 @@ public String toString() { } first = false; } + if (isSetBucketingVersion()) { + if (!first) sb.append(", "); + sb.append("bucketingVersion:"); + sb.append(this.bucketingVersion); + first = false; + } + if (isSetExpertMode()) { + if (!first) sb.append(", "); + sb.append("expertMode:"); + sb.append(this.expertMode); + first = false; + } sb.append(")"); return sb.toString(); } @@ -1758,6 +1914,22 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, Table struct) throw org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } break; + case 17: // BUCKETING_VERSION + if (schemeField.type == org.apache.thrift.protocol.TType.I32) { + struct.bucketingVersion = iprot.readI32(); + struct.setBucketingVersionIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; + case 18: // EXPERT_MODE + if (schemeField.type == org.apache.thrift.protocol.TType.BOOL) { + struct.expertMode = iprot.readBool(); + struct.setExpertModeIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; default: org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } @@ -1872,6 +2044,16 @@ public void write(org.apache.thrift.protocol.TProtocol oprot, Table struct) thro oprot.writeFieldEnd(); } } + if (struct.isSetBucketingVersion()) { + oprot.writeFieldBegin(BUCKETING_VERSION_FIELD_DESC); + oprot.writeI32(struct.bucketingVersion); + oprot.writeFieldEnd(); + } + if (struct.isSetExpertMode()) { + oprot.writeFieldBegin(EXPERT_MODE_FIELD_DESC); + oprot.writeBool(struct.expertMode); + oprot.writeFieldEnd(); + } oprot.writeFieldStop(); oprot.writeStructEnd(); } @@ -1938,7 +2120,13 @@ public void write(org.apache.thrift.protocol.TProtocol prot, Table struct) throw if (struct.isSetCreationMetadata()) { optionals.set(15); } - oprot.writeBitSet(optionals, 16); + if (struct.isSetBucketingVersion()) { + optionals.set(16); + } + if (struct.isSetExpertMode()) { + optionals.set(17); + } + oprot.writeBitSet(optionals, 18); if (struct.isSetTableName()) { oprot.writeString(struct.tableName); } @@ -2007,12 +2195,18 @@ public void write(org.apache.thrift.protocol.TProtocol prot, Table struct) throw } } } + if (struct.isSetBucketingVersion()) { + oprot.writeI32(struct.bucketingVersion); + } + if (struct.isSetExpertMode()) { + oprot.writeBool(struct.expertMode); + } } @Override public void read(org.apache.thrift.protocol.TProtocol prot, Table struct) throws org.apache.thrift.TException { TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(16); + BitSet incoming = iprot.readBitSet(18); if (incoming.get(0)) { struct.tableName = iprot.readString(); struct.setTableNameIsSet(true); @@ -2112,6 +2306,14 @@ public void read(org.apache.thrift.protocol.TProtocol prot, Table struct) throws } struct.setCreationMetadataIsSet(true); } + if (incoming.get(16)) { + struct.bucketingVersion = iprot.readI32(); + struct.setBucketingVersionIsSet(true); + } + if (incoming.get(17)) { + struct.expertMode = iprot.readBool(); + struct.setExpertModeIsSet(true); + } } } diff --git a/standalone-metastore/src/gen/thrift/gen-php/metastore/Types.php b/standalone-metastore/src/gen/thrift/gen-php/metastore/Types.php index 6878ee1be7..052f30ea5c 100644 --- a/standalone-metastore/src/gen/thrift/gen-php/metastore/Types.php +++ b/standalone-metastore/src/gen/thrift/gen-php/metastore/Types.php @@ -5042,6 +5042,14 @@ class Table { * @var array */ public $creationMetadata = null; + /** + * @var int + */ + public $bucketingVersion = 1; + /** + * @var bool + */ + public $expertMode = false; public function __construct($vals=null) { if (!isset(self::$_TSPEC)) { @@ -5134,6 +5142,14 @@ class Table { 'class' => '\metastore\BasicTxnInfo', ), ), + 17 => array( + 'var' => 'bucketingVersion', + 'type' => TType::I32, + ), + 18 => array( + 'var' => 'expertMode', + 'type' => TType::BOOL, + ), ); } if (is_array($vals)) { @@ -5185,6 +5201,12 @@ class Table { if (isset($vals['creationMetadata'])) { $this->creationMetadata = $vals['creationMetadata']; } + if (isset($vals['bucketingVersion'])) { + $this->bucketingVersion = $vals['bucketingVersion']; + } + if (isset($vals['expertMode'])) { + $this->expertMode = $vals['expertMode']; + } } } @@ -5359,6 +5381,20 @@ class Table { $xfer += $input->skip($ftype); } break; + case 17: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->bucketingVersion); + } else { + $xfer += $input->skip($ftype); + } + break; + case 18: + if ($ftype == TType::BOOL) { + $xfer += $input->readBool($this->expertMode); + } else { + $xfer += $input->skip($ftype); + } + break; default: $xfer += $input->skip($ftype); break; @@ -5496,6 +5532,16 @@ class Table { } $xfer += $output->writeFieldEnd(); } + if ($this->bucketingVersion !== null) { + $xfer += $output->writeFieldBegin('bucketingVersion', TType::I32, 17); + $xfer += $output->writeI32($this->bucketingVersion); + $xfer += $output->writeFieldEnd(); + } + if ($this->expertMode !== null) { + $xfer += $output->writeFieldBegin('expertMode', TType::BOOL, 18); + $xfer += $output->writeBool($this->expertMode); + $xfer += $output->writeFieldEnd(); + } $xfer += $output->writeFieldStop(); $xfer += $output->writeStructEnd(); return $xfer; diff --git a/standalone-metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py b/standalone-metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py index 25e9a889b2..6dfffaa281 100644 --- a/standalone-metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py +++ b/standalone-metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py @@ -3468,6 +3468,8 @@ class Table: - temporary - rewriteEnabled - creationMetadata + - bucketingVersion + - expertMode """ thrift_spec = ( @@ -3488,9 +3490,11 @@ class Table: (14, TType.BOOL, 'temporary', None, False, ), # 14 (15, TType.BOOL, 'rewriteEnabled', None, None, ), # 15 (16, TType.MAP, 'creationMetadata', (TType.STRING,None,TType.STRUCT,(BasicTxnInfo, BasicTxnInfo.thrift_spec)), None, ), # 16 + (17, TType.I32, 'bucketingVersion', None, 1, ), # 17 + (18, TType.BOOL, 'expertMode', None, False, ), # 18 ) - def __init__(self, tableName=None, dbName=None, owner=None, createTime=None, lastAccessTime=None, retention=None, sd=None, partitionKeys=None, parameters=None, viewOriginalText=None, viewExpandedText=None, tableType=None, privileges=None, temporary=thrift_spec[14][4], rewriteEnabled=None, creationMetadata=None,): + def __init__(self, tableName=None, dbName=None, owner=None, createTime=None, lastAccessTime=None, retention=None, sd=None, partitionKeys=None, parameters=None, viewOriginalText=None, viewExpandedText=None, tableType=None, privileges=None, temporary=thrift_spec[14][4], rewriteEnabled=None, creationMetadata=None, bucketingVersion=thrift_spec[17][4], expertMode=thrift_spec[18][4],): self.tableName = tableName self.dbName = dbName self.owner = owner @@ -3507,6 +3511,8 @@ def __init__(self, tableName=None, dbName=None, owner=None, createTime=None, las self.temporary = temporary self.rewriteEnabled = rewriteEnabled self.creationMetadata = creationMetadata + self.bucketingVersion = bucketingVersion + self.expertMode = expertMode def read(self, iprot): if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None: @@ -3618,6 +3624,16 @@ def read(self, iprot): iprot.readMapEnd() else: iprot.skip(ftype) + elif fid == 17: + if ftype == TType.I32: + self.bucketingVersion = iprot.readI32() + else: + iprot.skip(ftype) + elif fid == 18: + if ftype == TType.BOOL: + self.expertMode = iprot.readBool() + else: + iprot.skip(ftype) else: iprot.skip(ftype) iprot.readFieldEnd() @@ -3703,6 +3719,14 @@ def write(self, oprot): viter192.write(oprot) oprot.writeMapEnd() oprot.writeFieldEnd() + if self.bucketingVersion is not None: + oprot.writeFieldBegin('bucketingVersion', TType.I32, 17) + oprot.writeI32(self.bucketingVersion) + oprot.writeFieldEnd() + if self.expertMode is not None: + oprot.writeFieldBegin('expertMode', TType.BOOL, 18) + oprot.writeBool(self.expertMode) + oprot.writeFieldEnd() oprot.writeFieldStop() oprot.writeStructEnd() @@ -3728,6 +3752,8 @@ def __hash__(self): value = (value * 31) ^ hash(self.temporary) value = (value * 31) ^ hash(self.rewriteEnabled) value = (value * 31) ^ hash(self.creationMetadata) + value = (value * 31) ^ hash(self.bucketingVersion) + value = (value * 31) ^ hash(self.expertMode) return value def __repr__(self): diff --git a/standalone-metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb b/standalone-metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb index 3a11a0582a..8b013b2984 100644 --- a/standalone-metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb +++ b/standalone-metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb @@ -810,6 +810,8 @@ class Table TEMPORARY = 14 REWRITEENABLED = 15 CREATIONMETADATA = 16 + BUCKETINGVERSION = 17 + EXPERTMODE = 18 FIELDS = { TABLENAME => {:type => ::Thrift::Types::STRING, :name => 'tableName'}, @@ -827,7 +829,9 @@ class Table PRIVILEGES => {:type => ::Thrift::Types::STRUCT, :name => 'privileges', :class => ::PrincipalPrivilegeSet, :optional => true}, TEMPORARY => {:type => ::Thrift::Types::BOOL, :name => 'temporary', :default => false, :optional => true}, REWRITEENABLED => {:type => ::Thrift::Types::BOOL, :name => 'rewriteEnabled', :optional => true}, - CREATIONMETADATA => {:type => ::Thrift::Types::MAP, :name => 'creationMetadata', :key => {:type => ::Thrift::Types::STRING}, :value => {:type => ::Thrift::Types::STRUCT, :class => ::BasicTxnInfo}, :optional => true} + CREATIONMETADATA => {:type => ::Thrift::Types::MAP, :name => 'creationMetadata', :key => {:type => ::Thrift::Types::STRING}, :value => {:type => ::Thrift::Types::STRUCT, :class => ::BasicTxnInfo}, :optional => true}, + BUCKETINGVERSION => {:type => ::Thrift::Types::I32, :name => 'bucketingVersion', :default => 1, :optional => true}, + EXPERTMODE => {:type => ::Thrift::Types::BOOL, :name => 'expertMode', :default => false, :optional => true} } def struct_fields; FIELDS; end diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java index b3d99a1da5..a5206c91d0 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -1638,6 +1638,8 @@ private Table convertToTable(MTable mtbl) throws MetaException { mtbl.getViewOriginalText(), mtbl.getViewExpandedText(), tableType); t.setCreationMetadata(convertToCreationMetadata(mtbl.getCreationMetadata())); t.setRewriteEnabled(mtbl.isRewriteEnabled()); + t.setBucketingVersion(mtbl.getBucketingVersion()); + t.setExpertMode(mtbl.isExpertMode()); return t; } @@ -1676,7 +1678,8 @@ private MTable convertToMTable(Table tbl) throws InvalidObjectException, .getCreateTime(), tbl.getLastAccessTime(), tbl.getRetention(), convertToMFieldSchemas(tbl.getPartitionKeys()), tbl.getParameters(), tbl.getViewOriginalText(), tbl.getViewExpandedText(), tbl.isRewriteEnabled(), - convertToMCreationMetadata(tbl.getCreationMetadata()), tableType); + convertToMCreationMetadata(tbl.getCreationMetadata()), tableType, + tbl.getBucketingVersion(), tbl.isExpertMode()); } private List convertToMFieldSchemas(List keys) { diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/model/MTable.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/model/MTable.java index 6c40ae8753..7c7e753d88 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/model/MTable.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/model/MTable.java @@ -37,6 +37,8 @@ private boolean rewriteEnabled; private Map creationMetadata; private String tableType; + private int bucketingVersion; + private boolean expertMode; public MTable() {} @@ -58,7 +60,7 @@ public MTable(String tableName, MDatabase database, MStorageDescriptor sd, Strin int createTime, int lastAccessTime, int retention, List partitionKeys, Map parameters, String viewOriginalText, String viewExpandedText, boolean rewriteEnabled, Map creationMetadata, - String tableType) { + String tableType, int bucketingVersion, boolean expertMode) { this.tableName = tableName; this.database = database; this.sd = sd; @@ -73,6 +75,8 @@ public MTable(String tableName, MDatabase database, MStorageDescriptor sd, Strin this.rewriteEnabled = rewriteEnabled; this.creationMetadata = creationMetadata; this.tableType = tableType; + this.bucketingVersion = bucketingVersion; + this.expertMode = expertMode; } /** @@ -270,4 +274,32 @@ public void setTableType(String tableType) { public String getTableType() { return tableType; } + + /** + * @param bucketingVersion used in bucketed table + */ + public void setBucketingVersion(int bucketingVersion) { + this.bucketingVersion = bucketingVersion; + } + + /** + * @return the bucketingVersion + */ + public int getBucketingVersion() { + return bucketingVersion; + } + + /** + * @param expertMode + */ + public void setExpertMode(boolean expertMode) { + this.expertMode = expertMode; + } + + /** + * @return the expertMode + */ + public boolean isExpertMode() { + return expertMode; + } } diff --git a/standalone-metastore/src/main/thrift/hive_metastore.thrift b/standalone-metastore/src/main/thrift/hive_metastore.thrift index 93f3e53de2..5937d8661f 100644 --- a/standalone-metastore/src/main/thrift/hive_metastore.thrift +++ b/standalone-metastore/src/main/thrift/hive_metastore.thrift @@ -327,7 +327,9 @@ struct Table { 13: optional PrincipalPrivilegeSet privileges, 14: optional bool temporary=false, 15: optional bool rewriteEnabled, // rewrite enabled or not - 16: optional map creationMetadata // only for MVs, it stores table name used -> last modification before MV creation + 16: optional map creationMetadata, // only for MVs, it stores table name used -> last modification before MV creation + 17: optional i32 bucketingVersion = 1, // For bucketed table only. Default : 2, for existing tables, 1, for new tables 2. + 18: optional bool expertMode = false // For bucketed table only. Default : false. true if user loads data using “load data” command. } struct Partition { diff --git a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/TestTablesCreateDropAlterTruncate.java b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/TestTablesCreateDropAlterTruncate.java index abc400a928..72cc42787e 100644 --- a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/TestTablesCreateDropAlterTruncate.java +++ b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/TestTablesCreateDropAlterTruncate.java @@ -224,6 +224,9 @@ public void tearDown() throws Exception { public void testCreateGetDeleteTable() throws Exception { // Try to create a table with all of the parameters set Table table = getTableWithAllParametersSet(); + // Set parameters set outside + table.setBucketingVersion(2); + table.setExpertMode(false); client.createTable(table); Table createdTable = client.getTable(table.getDbName(), table.getTableName()); // The createTime will be set on the server side, so the comparison should skip it @@ -684,6 +687,9 @@ public void testAlterTable() throws Exception { // Partition keys can not be set, but getTableWithAllParametersSet is added one, so remove for // this test newTable.setPartitionKeys(originalTable.getPartitionKeys()); + // Set the optional bucketingVersion and expertMode with default values + newTable.setBucketingVersion(1); + newTable.setExpertMode(false); client.alter_table(originalDatabase, originalTableName, newTable); Table alteredTable = client.getTable(originalDatabase, originalTableName);