diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index b7d3e99e1a..e964ae3703 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1596,6 +1596,10 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "and bucketed table with few files (say 10 files) are being joined with a very small sorter and bucketed table\n" + "with few files (10 files), the sort-merge join will only use 10 mappers, and a simple map-only join might be faster\n" + "if the complete small table can fit in memory, and a map-join can be performed."), + HIVE_LOAD_DATA_BUCKETED_TABLE_EXPERT_MODE( + "hive.load.data.bucketed.table.expert.mode", false, + "If a user wants to load data into bucketed table, this must be \n" + + "set to true, otherwise an exception is thrown."), HIVESCRIPTOPERATORTRUST("hive.exec.script.trust", false, ""), HIVEROWOFFSET("hive.exec.rowoffset", false, diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/common/TestHCatUtil.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/common/TestHCatUtil.java index 91aa4fa269..9fe3ce59b6 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/common/TestHCatUtil.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/common/TestHCatUtil.java @@ -125,7 +125,7 @@ public void testGetTableSchemaWithPtnColsApi() throws IOException { org.apache.hadoop.hive.metastore.api.Table apiTable = new org.apache.hadoop.hive.metastore.api.Table("test_tblname", "test_dbname", "test_owner", 0, 0, 0, sd, new ArrayList(), new HashMap(), - "viewOriginalText", "viewExpandedText", TableType.EXTERNAL_TABLE.name()); + "viewOriginalText", "viewExpandedText", TableType.EXTERNAL_TABLE.name(), 2, false); Table table = new Table(apiTable); List expectedHCatSchema = @@ -170,7 +170,7 @@ public void testGetTableSchemaWithPtnColsSerDeReportedFields() throws IOExceptio org.apache.hadoop.hive.metastore.api.Table apiTable = new org.apache.hadoop.hive.metastore.api.Table("test_tblname", "test_dbname", "test_owner", 0, 0, 0, sd, new ArrayList(), new HashMap(), - "viewOriginalText", "viewExpandedText", TableType.EXTERNAL_TABLE.name()); + "viewOriginalText", "viewExpandedText", TableType.EXTERNAL_TABLE.name(), 2, false); Table table = new Table(apiTable); List expectedHCatSchema = Lists.newArrayList( diff --git a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/TestDbNotificationListener.java b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/TestDbNotificationListener.java index 9614114083..372ee19367 100644 --- a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/TestDbNotificationListener.java +++ b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/TestDbNotificationListener.java @@ -386,7 +386,7 @@ public void createTable() throws Exception { emptyParameters); Table table = new Table(tblName, defaultDbName, tblOwner, startTime, startTime, 0, sd, null, - emptyParameters, null, null, TableType.MANAGED_TABLE.toString()); + emptyParameters, null, null, TableType.MANAGED_TABLE.toString(), 2, false); msClient.createTable(table); // Get notifications from metastore @@ -413,7 +413,7 @@ public void createTable() throws Exception { // a failed event should not create a new notification table = new Table(tblName2, defaultDbName, tblOwner, startTime, startTime, 0, sd, null, - emptyParameters, null, null, null); + emptyParameters, null, null, null, 2 ,false); DummyRawStoreFailEvent.setEventSucceed(false); try { msClient.createTable(table); @@ -441,14 +441,14 @@ public void alterTable() throws Exception { emptyParameters); Table table = new Table(tblName, defaultDbName, tblOwner, startTime, startTime, 0, sd, - new ArrayList(), emptyParameters, null, null, null); + new ArrayList(), emptyParameters, null, null, null, 2, false); // Event 1 msClient.createTable(table); cols.add(col2); table = new Table(tblName, defaultDbName, tblOwner, startTime, startTime, 0, sd, - new ArrayList(), emptyParameters, null, null, null); + new ArrayList(), emptyParameters, null, null, null, 2, false); // Event 2 msClient.alter_table(defaultDbName, tblName, table); @@ -498,7 +498,7 @@ public void dropTable() throws Exception { emptyParameters); Table table = new Table(tblName, defaultDbName, tblOwner, startTime, startTime, 0, sd, null, - emptyParameters, null, null, null); + emptyParameters, null, null, null, 2, false); // Event 1 msClient.createTable(table); @@ -535,7 +535,7 @@ public void dropTable() throws Exception { // a failed event should not create a new notification table = new Table(tblName2, defaultDbName, tblOwner, startTime, startTime, 0, sd, null, - emptyParameters, null, null, null); + emptyParameters, null, null, null, 2, false); msClient.createTable(table); DummyRawStoreFailEvent.setEventSucceed(false); try { @@ -568,7 +568,7 @@ public void addPartition() throws Exception { partCols.add(partCol1); Table table = new Table(tblName, defaultDbName, tblOwner, startTime, startTime, 0, sd, partCols, - emptyParameters, null, null, null); + emptyParameters, null, null, null, 2, false); // Event 1 msClient.createTable(table); @@ -636,7 +636,7 @@ public void alterPartition() throws Exception { partCols.add(partCol1); Table table = new Table(tblName, defaultDbName, tblOwner, startTime, startTime, 0, sd, partCols, - emptyParameters, null, null, null); + emptyParameters, null, null, null, 2, false); // Event 1 msClient.createTable(table); @@ -704,7 +704,7 @@ public void dropPartition() throws Exception { partCols.add(partCol1); Table table = new Table(tblName, defaultDbName, tblOwner, startTime, startTime, 0, sd, partCols, - emptyParameters, null, null, null); + emptyParameters, null, null, null, 2, false); // Event 1 msClient.createTable(table); @@ -770,7 +770,7 @@ public void exchangePartition() throws Exception { StorageDescriptor sd1 = new StorageDescriptor(cols, "file:/tmp/1", "input", "output", false, 0, serde, null, null, emptyParameters); Table tab1 = new Table("tab1", dbName, "me", startTime, startTime, 0, sd1, partCols, - emptyParameters, null, null, null); + emptyParameters, null, null, null, 2, false); msClient.createTable(tab1); NotificationEventResponse rsp = msClient.getNextNotification(firstEventId, 0, null); assertEquals(1, rsp.getEventsSize()); // add_table @@ -778,7 +778,7 @@ public void exchangePartition() throws Exception { StorageDescriptor sd2 = new StorageDescriptor(cols, "file:/tmp/2", "input", "output", false, 0, serde, null, null, emptyParameters); Table tab2 = new Table("tab2", dbName, "me", startTime, startTime, 0, sd2, partCols, - emptyParameters, null, null, null); // add_table + emptyParameters, null, null, null, 2, false); // add_table msClient.createTable(tab2); rsp = msClient.getNextNotification(firstEventId + 1, 0, null); assertEquals(1, rsp.getEventsSize()); @@ -978,7 +978,7 @@ public void createIndex() throws Exception { Arrays.asList("bucketcol"), Arrays.asList(new Order("sortcol", 1)), params); Table table = new Table(tableName, dbName, "me", startTime, startTime, 0, sd, null, emptyParameters, - null, null, null); + null, null, null, 2, false); // Event 1 msClient.createTable(table); Index index = @@ -986,7 +986,7 @@ public void createIndex() throws Exception { emptyParameters, false); Table indexTable = new Table(indexTableName, dbName, "me", startTime, startTime, 0, sd, null, emptyParameters, - null, null, null); + null, null, null, 2, false); // Event 2, 3 (index table and index) msClient.createIndex(index, indexTable); @@ -1021,7 +1021,7 @@ public void createIndex() throws Exception { "createIndexTable2__createIndexTable2__", sd, emptyParameters, false); Table indexTable2 = new Table("createIndexTable2__createIndexTable2__", dbName, "me", startTime, startTime, 0, - sd, null, emptyParameters, null, null, null); + sd, null, emptyParameters, null, null, null, 2, false); try { msClient.createIndex(index, indexTable2); fail("Error: create index should've failed"); @@ -1049,7 +1049,7 @@ public void dropIndex() throws Exception { Arrays.asList("bucketcol"), Arrays.asList(new Order("sortcol", 1)), params); Table table = new Table(tableName, dbName, "me", startTime, startTime, 0, sd, null, emptyParameters, - null, null, null); + null, null, null, 2, false); // Event 1 msClient.createTable(table); Index index = @@ -1057,7 +1057,7 @@ public void dropIndex() throws Exception { emptyParameters, false); Table indexTable = new Table(indexTableName, dbName, "me", startTime, startTime, 0, sd, null, emptyParameters, - null, null, null); + null, null, null, 2, false); // Event 2, 3 (index table and index) msClient.createIndex(index, indexTable); // Event 4 (drops index and indexTable) @@ -1092,7 +1092,7 @@ public void dropIndex() throws Exception { "dropIndexTable__dropIndexTable2__", sd, emptyParameters, false); Table indexTable2 = new Table("dropIndexTable__dropIndexTable2__", dbName, "me", startTime, startTime, 0, sd, - null, emptyParameters, null, null, null); + null, emptyParameters, null, null, null, 2, false); msClient.createIndex(index, indexTable2); DummyRawStoreFailEvent.setEventSucceed(false); try { @@ -1124,7 +1124,7 @@ public void alterIndex() throws Exception { Arrays.asList("bucketcol"), Arrays.asList(new Order("sortcol", 1)), params); Table table = new Table(tableName, dbName, "me", startTime, startTime, 0, sd, null, emptyParameters, - null, null, null); + null, null, null, 2, false); // Event 1 msClient.createTable(table); Index oldIndex = @@ -1132,7 +1132,7 @@ public void alterIndex() throws Exception { emptyParameters, false); Table oldIndexTable = new Table(indexTableName, dbName, "me", startTime, startTime, 0, sd, null, emptyParameters, - null, null, null); + null, null, null, 2, false); // Event 2, 3 msClient.createIndex(oldIndex, oldIndexTable); // creates index and index table Index newIndex = @@ -1195,7 +1195,7 @@ public void insertTable() throws Exception { emptyParameters); Table table = new Table(tblName, defaultDbName, tblOwner, startTime, startTime, 0, sd, null, - emptyParameters, null, null, null); + emptyParameters, null, null, null, 2, false); // Event 1 msClient.createTable(table); @@ -1257,7 +1257,7 @@ public void insertPartition() throws Exception { partCols.add(partCol1); Table table = new Table(tblName, defaultDbName, tblOwner, startTime, startTime, 0, sd, partCols, - emptyParameters, null, null, null); + emptyParameters, null, null, null, 2, false); // Event 1 msClient.createTable(table); Partition partition = diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestReplChangeManager.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestReplChangeManager.java index 6ade76d0c2..fbb9171c8f 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestReplChangeManager.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestReplChangeManager.java @@ -139,7 +139,7 @@ public void testRecyclePartTable() throws Exception { false, 0, serdeInfo, null, null, null); Map tableParameters = new HashMap(); - Table tbl = new Table(tblName, dbName, "", 0, 0, 0, sd, partColumns, tableParameters, "", "", ""); + Table tbl = new Table(tblName, dbName, "", 0, 0, 0, sd, partColumns, tableParameters, "", "", "", 2, false); client.createTable(tbl); @@ -221,7 +221,7 @@ public void testRecycleNonPartTable() throws Exception { false, 0, serdeInfo, null, null, null); Map tableParameters = new HashMap(); - Table tbl = new Table(tblName, dbName, "", 0, 0, 0, sd, null, tableParameters, "", "", ""); + Table tbl = new Table(tblName, dbName, "", 0, 0, 0, sd, null, tableParameters, "", "", "", 2, false); client.createTable(tbl); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java index 26afe90faa..911f3f20cd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java @@ -108,14 +108,15 @@ public int compare(InputSplit inp1, InputSplit inp2) { private final Multimap bucketToTaskMap = HashMultimap. create(); private final Map> inputToGroupedSplitMap = - new HashMap>(); + new HashMap<>(); private int numInputsAffectingRootInputSpecUpdate = 1; private int numInputsSeenSoFar = 0; private final Map emMap = Maps.newHashMap(); private final List finalSplits = Lists.newLinkedList(); private final Map inputNameInputSpecMap = - new HashMap(); + new HashMap<>(); + private Map inputToBucketMap; public CustomPartitionVertex(VertexManagerPluginContext context) { super(context); @@ -137,6 +138,7 @@ public void initialize() { this.mainWorkName = vertexConf.getInputName(); this.vertexType = vertexConf.getVertexType(); this.numInputsAffectingRootInputSpecUpdate = vertexConf.getNumInputs(); + this.inputToBucketMap = vertexConf.getInputToBucketMap(); } @Override @@ -242,7 +244,7 @@ public void onRootVertexInitialized(String inputName, InputDescriptor inputDescr } Multimap bucketToInitialSplitMap = - getBucketSplitMapForPath(pathFileSplitsMap); + getBucketSplitMapForPath(inputName, pathFileSplitsMap); try { int totalResource = context.getTotalAvailableResource().getMemory(); @@ -532,20 +534,44 @@ private FileSplit getFileSplitFromEvent(InputDataInformationEvent event) throws /* * This method generates the map of bucket to file splits. */ - private Multimap getBucketSplitMapForPath( + private Multimap getBucketSplitMapForPath(String inputName, Map> pathFileSplitsMap) { - int bucketNum = 0; Multimap bucketToInitialSplitMap = - ArrayListMultimap. create(); + ArrayListMultimap.create(); + boolean fallback = false; + List bucketIds = new ArrayList<>(); for (Map.Entry> entry : pathFileSplitsMap.entrySet()) { - int bucketId = bucketNum % numBuckets; + // Extract the buckedID from pathFilesMap, this is more accurate method, + // however. it may not work in certain cases where buckets are named + // after files used while loading data. In such case, fallback to old + // potential inaccurate method + String bucketStr = entry.getKey().substring(0, entry.getKey().length() - 2); + LOG.info("BucketStr = " + bucketStr); + int bucketId = -1; + try { + bucketId = Integer.parseInt(bucketStr); + } catch (NumberFormatException e) { + fallback = true; + bucketIds.clear(); + break; + } + bucketIds.add(bucketId); for (FileSplit fsplit : entry.getValue()) { bucketToInitialSplitMap.put(bucketId, fsplit); } - bucketNum++; + } + + int bucketNum = 0; + if (fallback) { + for (Map.Entry> entry : pathFileSplitsMap.entrySet()) { + for (FileSplit fsplit : entry.getValue()) { + bucketToInitialSplitMap.put(bucketNum, fsplit); + } + bucketNum++; + } } // this is just for SMB join use-case. The numBuckets would be equal to that of the big table @@ -553,16 +579,28 @@ private FileSplit getFileSplitFromEvent(InputDataInformationEvent event) throws // data from the right buckets to the big table side. For e.g. Big table has 8 buckets and small // table has 4 buckets, bucket 0 of small table needs to be sent to bucket 4 of the big table as // well. - if (bucketNum < numBuckets) { - int loopedBucketId = 0; - for (; bucketNum < numBuckets; bucketNum++) { - for (InputSplit fsplit : bucketToInitialSplitMap.get(loopedBucketId)) { - bucketToInitialSplitMap.put(bucketNum, fsplit); + if (numInputsAffectingRootInputSpecUpdate != 1 && + inputName.compareTo(mainWorkName) != 0) { + // small table + int inputNumBuckets = inputToBucketMap.get(inputName); + if (fallback && bucketNum != inputNumBuckets) { + // The fallback mechanism kicked in which only works correctly if + // there exists a file for each bucket, else it may result in wrong + // result. Throw an error + + } + if (inputNumBuckets < numBuckets) { + // Need to send the splits to multiple buckets + for (int i = 1; i < numBuckets/inputNumBuckets; i++) { + int bucketIdBase = i * inputNumBuckets; + for (Integer bucketId : bucketIds) { + for (InputSplit fsplit : bucketToInitialSplitMap.get(bucketId)) { + bucketToInitialSplitMap.put(bucketIdBase + bucketId, fsplit); + } + } } - loopedBucketId++; } } - return bucketToInitialSplitMap; } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomVertexConfiguration.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomVertexConfiguration.java index ef5e7edcd6..af1395f926 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomVertexConfiguration.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomVertexConfiguration.java @@ -21,6 +21,8 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.util.HashMap; +import java.util.Map; import org.apache.hadoop.hive.ql.plan.TezWork.VertexType; import org.apache.hadoop.io.Writable; @@ -39,22 +41,24 @@ private VertexType vertexType = VertexType.AUTO_INITIALIZED_EDGES; private int numInputs; private String inputName; + private Map inputToBucketMap; public CustomVertexConfiguration() { } // this is the constructor to use for the Bucket map join case. public CustomVertexConfiguration(int numBuckets, VertexType vertexType) { - this(numBuckets, vertexType, "", 1); + this(numBuckets, vertexType, "", 1, null); } // this is the constructor to use for SMB. public CustomVertexConfiguration(int numBuckets, VertexType vertexType, String inputName, - int numInputs) { + int numInputs, Map inputToBucketMap) { this.numBuckets = numBuckets; this.vertexType = vertexType; this.numInputs = numInputs; this.inputName = inputName; + this.inputToBucketMap = inputToBucketMap; } @Override @@ -63,6 +67,14 @@ public void write(DataOutput out) throws IOException { out.writeInt(this.numBuckets); out.writeInt(numInputs); out.writeUTF(inputName); + int sz = inputToBucketMap != null ? inputToBucketMap.size() : 0; + out.writeInt(sz); + if (sz > 0) { + for (Map.Entry entry : inputToBucketMap.entrySet()) { + out.writeUTF(entry.getKey()); + out.writeInt(entry.getValue()); + } + } } @Override @@ -71,6 +83,13 @@ public void readFields(DataInput in) throws IOException { this.numBuckets = in.readInt(); this.numInputs = in.readInt(); this.inputName = in.readUTF(); + int sz = in.readInt(); + if (sz > 0) { + this.inputToBucketMap = new HashMap<>(); + for (int i = 0; i < sz; i++) { + this.inputToBucketMap.put(in.readUTF(), in.readInt()); + } + } } public int getNumBuckets() { @@ -88,4 +107,8 @@ public String getInputName() { public int getNumInputs() { return numInputs; } + + public Map getInputToBucketMap() { + return inputToBucketMap; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java index 9885038588..4d7a4bfcde 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java @@ -568,13 +568,28 @@ private Vertex createVertex(JobConf conf, MergeJoinWork mergeJoinWork, FileSyste MultiMRInput.createConfigBuilder(conf, HiveInputFormat.class).build()); } + // To be populated for SMB joins only for all the small tables + Map inputToBucketMap = new HashMap<>(); + if (mergeJoinWork.getMergeJoinOperator().getParentOperators().size() == 1 + && mergeJoinWork.getMergeJoinOperator().getOpTraits() != null) { + // This is an SMB join. + for (BaseWork work : mapWorkList) { + MapWork mw = (MapWork) work; + Map> aliasToWork = mw.getAliasToWork(); + if (aliasToWork.size() > 1) { + LOG.warn("More than 1 alias in SMB mapwork, assert"); + assert false; + } + inputToBucketMap.put(mw.getName(), mw.getWorks().get(0).getOpTraits().getNumBuckets()); + } + } VertexManagerPluginDescriptor desc = VertexManagerPluginDescriptor.create(CustomPartitionVertex.class.getName()); // the +1 to the size is because of the main work. CustomVertexConfiguration vertexConf = new CustomVertexConfiguration(mergeJoinWork.getMergeJoinOperator().getConf() .getNumBuckets(), vertexType, mergeJoinWork.getBigTableAlias(), - mapWorkList.size() + 1); + mapWorkList.size() + 1, inputToBucketMap); DataOutputBuffer dob = new DataOutputBuffer(); vertexConf.write(dob); byte[] userPayload = dob.getData(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index 9b0ffe0e91..25d1681f9b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -187,6 +187,8 @@ public void setTTable(org.apache.hadoop.hive.metastore.api.Table tTable) { t.setOwner(SessionState.getUserFromAuthenticator()); // set create time t.setCreateTime((int) (System.currentTimeMillis() / 1000)); + t.setBucketingVersion(2); + t.setExpertMode(false); } return t; } @@ -675,6 +677,10 @@ public int getNumBuckets() { return tTable.getSd().getNumBuckets(); } + public int getBucketingVersion() { + return tTable.getBucketingVersion(); + } + public void setInputFormatClass(String name) throws HiveException { if (name == null) { inputFormatClass = null; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index dc698c8de8..324b737cf9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -180,7 +180,8 @@ MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, mapJoinConversionPos, true); // map join operator by default has no bucket cols and num of reduce sinks // reduced by 1 - mapJoinOp.setOpTraits(new OpTraits(null, -1, null, joinOp.getOpTraits().getNumReduceSinks())); + mapJoinOp.setOpTraits(new OpTraits(null, -1, null, + joinOp.getOpTraits().getNumReduceSinks(), joinOp.getOpTraits().getBucketingVersion())); mapJoinOp.setStatistics(joinOp.getStatistics()); // propagate this change till the next RS for (Operator childOp : mapJoinOp.getChildOperators()) { @@ -378,7 +379,7 @@ private void convertJoinSMBJoin(JoinOperator joinOp, OptimizeTezProcContext cont joinOp.getSchema()); int numReduceSinks = joinOp.getOpTraits().getNumReduceSinks(); OpTraits opTraits = new OpTraits(joinOp.getOpTraits().getBucketColNames(), numBuckets, - joinOp.getOpTraits().getSortCols(), numReduceSinks); + joinOp.getOpTraits().getSortCols(), numReduceSinks, joinOp.getOpTraits().getBucketingVersion()); mergeJoinOp.setOpTraits(opTraits); mergeJoinOp.setStatistics(joinOp.getStatistics()); @@ -445,7 +446,8 @@ private void setAllChildrenTraits(Operator currentOp, Op return; } currentOp.setOpTraits(new OpTraits(opTraits.getBucketColNames(), - opTraits.getNumBuckets(), opTraits.getSortCols(), opTraits.getNumReduceSinks())); + opTraits.getNumBuckets(), opTraits.getSortCols(), opTraits.getNumReduceSinks(), + opTraits.getBucketingVersion())); for (Operator childOp : currentOp.getChildOperators()) { if ((childOp instanceof ReduceSinkOperator) || (childOp instanceof GroupByOperator)) { break; @@ -498,7 +500,8 @@ private boolean convertJoinBucketMapJoin(JoinOperator joinOp, OptimizeTezProcCon // we can set the traits for this join operator opTraits = new OpTraits(joinOp.getOpTraits().getBucketColNames(), - tezBucketJoinProcCtx.getNumBuckets(), null, joinOp.getOpTraits().getNumReduceSinks()); + tezBucketJoinProcCtx.getNumBuckets(), null, + joinOp.getOpTraits().getNumReduceSinks(), joinOp.getOpTraits().getBucketingVersion()); mapJoinOp.setOpTraits(opTraits); mapJoinOp.setStatistics(joinOp.getStatistics()); setNumberOfBucketsOnChildren(mapJoinOp); @@ -576,6 +579,13 @@ private boolean checkConvertJoinSMBJoin(JoinOperator joinOp, OptimizeTezProcCont return false; } ReduceSinkOperator rsOp = (ReduceSinkOperator) parentOp; + // If the chosen big table has less number of buckets than any of the + // small tables, then those buckets will have no mapping to any of the + // big table buckets resulting in wrong results. + if (numBuckets > 0 && numBuckets < rsOp.getOpTraits().getNumBuckets()) { + LOG.info("Small table has more buckets than big table."); + return false; + } if (!checkColEquality(rsOp.getParentOperators().get(0).getOpTraits().getSortCols(), rsOp .getOpTraits().getSortCols(), rsOp.getColumnExprMap(), false)) { LOG.info("We cannot convert to SMB because the sort column names do not match."); @@ -593,6 +603,36 @@ private boolean checkConvertJoinSMBJoin(JoinOperator joinOp, OptimizeTezProcCont numBuckets = bigTableRS.getConf().getNumReducers(); } tezBucketJoinProcCtx.setNumBuckets(numBuckets); + + // With bucketing using two different versions. Version 1 for exiting + // tables and version 2 for new tables. All the inputs to the SMB must be + // from same version. This only applies to tables read directly and not + // intermediate outputs of joins/groupbys + int version = -1; + for (Operator parentOp : joinOp.getParentOperators()) { + // Check if the parent is coming from a table scan, if so, what is the version of it. + assert parentOp.getParentOperators() != null && parentOp.getParentOperators().size() == 1; + Operator op = parentOp.getParentOperators().get(0); + while(op != null && !(op instanceof TableScanOperator + || op instanceof ReduceSinkOperator + || op instanceof CommonJoinOperator)) { + // If op has parents it is guaranteed to be 1. + op = op.getParentOperators().size() > 0 ? + op.getParentOperators().get(0) : null; + } + + if (op instanceof TableScanOperator) { + int localVersion = ((TableScanOperator)op).getConf(). + getTableMetadata().getBucketingVersion(); + if (version == -1) { + version = localVersion; + } else if (version != localVersion) { + // versions dont match, return false. + LOG.info("SMB Join can't be performed due to bucketing version mismatch"); + return false; + } + } + } LOG.info("We can convert the join to an SMB join."); return true; } @@ -1168,7 +1208,8 @@ private boolean convertJoinDynamicPartitionedHashJoin(JoinOperator joinOp, Optim joinOp.getOpTraits().getBucketColNames(), numReducers, null, - joinOp.getOpTraits().getNumReduceSinks()); + joinOp.getOpTraits().getNumReduceSinks(), + joinOp.getOpTraits().getBucketingVersion()); mapJoinOp.setOpTraits(opTraits); mapJoinOp.setStatistics(joinOp.getStatistics()); // propagate this change till the next RS diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java index 69d9f3125a..ac2f75a7bc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java @@ -92,10 +92,12 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, List> listBucketCols = new ArrayList>(); int numBuckets = -1; int numReduceSinks = 1; + int bucketingVersion = -1; OpTraits parentOpTraits = rs.getParentOperators().get(0).getOpTraits(); if (parentOpTraits != null) { numBuckets = parentOpTraits.getNumBuckets(); numReduceSinks += parentOpTraits.getNumReduceSinks(); + bucketingVersion = parentOpTraits.getBucketingVersion(); } List bucketCols = new ArrayList<>(); @@ -134,7 +136,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } listBucketCols.add(bucketCols); - OpTraits opTraits = new OpTraits(listBucketCols, numBuckets, listBucketCols, numReduceSinks); + OpTraits opTraits = new OpTraits(listBucketCols, numBuckets, + listBucketCols, numReduceSinks, bucketingVersion); rs.setOpTraits(opTraits); return null; } @@ -213,7 +216,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, sortedColsList.add(sortCols); } // num reduce sinks hardcoded to 0 because TS has no parents - OpTraits opTraits = new OpTraits(bucketColsList, numBuckets, sortedColsList, 0); + OpTraits opTraits = new OpTraits(bucketColsList, numBuckets, + sortedColsList, 0, table.getBucketingVersion()); ts.setOpTraits(opTraits); return null; } @@ -239,12 +243,15 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, List> listBucketCols = new ArrayList>(); int numReduceSinks = 0; + int bucketingVersion = -1; OpTraits parentOpTraits = gbyOp.getParentOperators().get(0).getOpTraits(); if (parentOpTraits != null) { numReduceSinks = parentOpTraits.getNumReduceSinks(); + bucketingVersion = parentOpTraits.getBucketingVersion(); } listBucketCols.add(gbyKeys); - OpTraits opTraits = new OpTraits(listBucketCols, -1, listBucketCols, numReduceSinks); + OpTraits opTraits = new OpTraits(listBucketCols, -1, listBucketCols, + numReduceSinks, bucketingVersion); gbyOp.setOpTraits(opTraits); return null; } @@ -298,12 +305,15 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, int numBuckets = -1; int numReduceSinks = 0; + int bucketingVersion = -1; OpTraits parentOpTraits = selOp.getParentOperators().get(0).getOpTraits(); if (parentOpTraits != null) { numBuckets = parentOpTraits.getNumBuckets(); numReduceSinks = parentOpTraits.getNumReduceSinks(); + bucketingVersion = parentOpTraits.getBucketingVersion(); } - OpTraits opTraits = new OpTraits(listBucketCols, numBuckets, listSortCols, numReduceSinks); + OpTraits opTraits = new OpTraits(listBucketCols, numBuckets, listSortCols, + numReduceSinks, bucketingVersion); selOp.setOpTraits(opTraits); return null; } @@ -319,6 +329,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, List> sortColsList = new ArrayList>(); byte pos = 0; int numReduceSinks = 0; // will be set to the larger of the parents + int bucketingVersion = -1; + boolean bucketingVersionSeen = false; for (Operator parentOp : joinOp.getParentOperators()) { if (!(parentOp instanceof ReduceSinkOperator)) { // can be mux operator @@ -335,10 +347,18 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if (parentOpTraits.getNumReduceSinks() > numReduceSinks) { numReduceSinks = parentOpTraits.getNumReduceSinks(); } + // If there is mismatch in bucketingVersion, then it should be set to + // -1, that way SMB will be disabled. + if (bucketingVersion == -1 && !bucketingVersionSeen) { + bucketingVersion = parentOpTraits.getBucketingVersion(); + bucketingVersionSeen = true; + } else if (bucketingVersion != parentOpTraits.getBucketingVersion()) { + bucketingVersion = -1; + } pos++; } - joinOp.setOpTraits(new OpTraits(bucketColsList, -1, bucketColsList, numReduceSinks)); + joinOp.setOpTraits(new OpTraits(bucketColsList, -1, bucketColsList, numReduceSinks, bucketingVersion)); return null; } @@ -392,6 +412,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Operator operator = (Operator) nd; int numReduceSinks = 0; + int bucketingVersion = -1; + boolean bucketingVersionSeen = false; for (Operator parentOp : operator.getParentOperators()) { if (parentOp.getOpTraits() == null) { continue; @@ -399,8 +421,17 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if (parentOp.getOpTraits().getNumReduceSinks() > numReduceSinks) { numReduceSinks = parentOp.getOpTraits().getNumReduceSinks(); } + // If there is mismatch in bucketingVersion, then it should be set to + // -1, that way SMB will be disabled. + if (bucketingVersion == -1 && !bucketingVersionSeen) { + bucketingVersion = parentOp.getOpTraits().getBucketingVersion(); + bucketingVersionSeen = true; + } else if (bucketingVersion != parentOp.getOpTraits().getBucketingVersion()) { + bucketingVersion = -1; + } } - OpTraits opTraits = new OpTraits(null, -1, null, numReduceSinks); + OpTraits opTraits = new OpTraits(null, -1, + null, numReduceSinks, bucketingVersion); operator.setOpTraits(opTraits); return null; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java index bacc44482a..39d2370435 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java @@ -121,7 +121,8 @@ } // we can set the traits for this join operator - OpTraits opTraits = new OpTraits(bucketColNames, numBuckets, null, joinOp.getOpTraits().getNumReduceSinks()); + OpTraits opTraits = new OpTraits(bucketColNames, numBuckets, null, + joinOp.getOpTraits().getNumReduceSinks(), joinOp.getOpTraits().getBucketingVersion()); mapJoinOp.setOpTraits(opTraits); mapJoinOp.setStatistics(joinOp.getStatistics()); setNumberOfBucketsOnChildren(mapJoinOp); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java index 5868d4dd56..2be5bdac05 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java @@ -168,6 +168,50 @@ private URI initializeFromURI(String fromPath, boolean isLocal) throws IOExcepti } } } + // Do another loop if table is bucketed + List bucketCols = table.getBucketCols(); + if (bucketCols != null && !bucketCols.isEmpty()) { + // Hive assumes that user names the files as per the corresponding + // bucket. For e.g, file names should follow the format 000000_0, 000000_1 etc. + // Here the 1st file will belong to bucket 0 and 2nd to bucket 1 and so on. + boolean[] bucketArray = new boolean[table.getNumBuckets()]; + // initialize the array + int numBuckets = table.getNumBuckets(); + for (int i = 0; i < numBuckets; i++) { + bucketArray[i] = false; + } + + for (FileStatus oneSrc : srcs) { + String bucketName = oneSrc.getPath().getName(); + + //get the bucket id + String bucketIdStr = + Utilities.getBucketFileNameFromPathSubString(bucketName); + int bucketId = Utilities.getBucketIdFromFile(bucketIdStr); + LOG.info("bucket ID for file " + oneSrc.getPath() + " = " + bucketId + + " for table " + table.getFullyQualifiedName()); + if (bucketId == -1) { + throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg( + "The file name is invalid : " + + oneSrc.getPath().toString() + " for table " + + table.getFullyQualifiedName())); + } + if (bucketId >= numBuckets) { + throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg( + "The file name corresponds to invalid bucketId : " + + oneSrc.getPath().toString()) + + ". Maximum number of buckets can be " + numBuckets + + " for table " + table.getFullyQualifiedName()); + } + if (bucketArray[bucketId]) { + throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg( + "Multiple files for same bucket : " + bucketId + + ". Only 1 file per bucket allowed in single load command. To load multiple files for same bucket, use multiple statements for table " + + table.getFullyQualifiedName())); + } + bucketArray[bucketId] = true; + } + } } catch (IOException e) { // Has to use full name to make sure it does not conflict with // org.apache.commons.lang.StringUtils @@ -236,6 +280,13 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { throw new SemanticException("Please load into an intermediate table" + " and use 'insert... select' to allow Hive to enforce bucketing. " + error); } + // Check if expert mode is set by the user + if (!conf.getBoolVar(HiveConf.ConfVars.HIVE_LOAD_DATA_BUCKETED_TABLE_EXPERT_MODE)) { + throw new SemanticException("Using load data into bucketed table is discouraged. " + + "User is responsible for verifying the sanity of the data. " + + " Inorder to proceed please turn the flag " + + "hive.load.data.bucketed.table.expert.mode to true."); + } } // make sure the arguments make sense diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/OpTraits.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/OpTraits.java index 9621c3be53..6cf6c31ba8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/OpTraits.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/OpTraits.java @@ -22,17 +22,20 @@ public class OpTraits { - List> bucketColNames; - List> sortColNames; - int numBuckets; - int numReduceSinks; + private List> bucketColNames; + private List> sortColNames; + private int numBuckets; + private int numReduceSinks; + private int bucketingVersion; public OpTraits(List> bucketColNames, int numBuckets, - List> sortColNames, int numReduceSinks) { + List> sortColNames, int numReduceSinks, + int bucketingVersion) { this.bucketColNames = bucketColNames; this.numBuckets = numBuckets; this.sortColNames = sortColNames; this.numReduceSinks = numReduceSinks; + this.bucketingVersion = bucketingVersion; } public List> getBucketColNames() { @@ -68,6 +71,13 @@ public int getNumReduceSinks() { return this.numReduceSinks; } + public void setBucketingVersion(int bucketingVersion) { + this.bucketingVersion = bucketingVersion; + } + + public int getBucketingVersion() { + return bucketingVersion; + } @Override public String toString() { diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q index e5fdcb57e4..b7bd10eed2 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q @@ -1,19 +1,21 @@ set hive.strict.checks.bucketing=false; set hive.mapred.mode=nonstrict; --- small 1 part, 4 bucket & big 2 part, 2 bucket -CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +-- small 1 part, 2 bucket & big 2 part, 4 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; load data local inpath '../../data/files/auto_sortmerge_join/small/000000_0' INTO TABLE bucket_small partition(ds='2008-04-08'); load data local inpath '../../data/files/auto_sortmerge_join/small/000001_0' INTO TABLE bucket_small partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small partition(ds='2008-04-08'); -CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-08'); load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08'); load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-09'); load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09'); set hive.auto.convert.join=true; set hive.auto.convert.sortmerge.join=true; diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q index abf09e5534..9f719aebb5 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q @@ -1,7 +1,7 @@ set hive.strict.checks.bucketing=false; set hive.mapred.mode=nonstrict; --- small 2 part, 4 bucket & big 1 part, 2 bucket +-- small 2 part, 4 bucket & big 1 part, 4 bucket CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; load data local inpath '../../data/files/auto_sortmerge_join/small/000000_0' INTO TABLE bucket_small partition(ds='2008-04-08'); load data local inpath '../../data/files/auto_sortmerge_join/small/000001_0' INTO TABLE bucket_small partition(ds='2008-04-08'); @@ -13,9 +13,11 @@ load data local inpath '../../data/files/auto_sortmerge_join/small/000001_0' INT load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small partition(ds='2008-04-09'); load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small partition(ds='2008-04-09'); -CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-08'); load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08'); set hive.auto.convert.join=true; set hive.auto.convert.sortmerge.join=true; diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q index b85c4a7aa3..c107501d0a 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q @@ -1,19 +1,19 @@ set hive.strict.checks.bucketing=false; set hive.mapred.mode=nonstrict; --- small no part, 4 bucket & big no part, 2 bucket +-- small no part, 2 bucket & big no part, 4 bucket -- SORT_QUERY_RESULTS -CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; load data local inpath '../../data/files/auto_sortmerge_join/small/000000_0' INTO TABLE bucket_small; load data local inpath '../../data/files/auto_sortmerge_join/small/000001_0' INTO TABLE bucket_small; -load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small; -load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small; -CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big; load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big; +load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big; +load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big; set hive.auto.convert.sortmerge.join=true; set hive.optimize.bucketmapjoin = true; diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q index bd780861e3..a5cc04a97f 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q @@ -1,7 +1,7 @@ set hive.strict.checks.bucketing=false; set hive.mapred.mode=nonstrict; --- small 2 part, 4 bucket & big 2 part, 2 bucket +-- small 2 part, 4 bucket & big 2 part, 4 bucket CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; load data local inpath '../../data/files/auto_sortmerge_join/small/000000_0' INTO TABLE bucket_small partition(ds='2008-04-08'); load data local inpath '../../data/files/auto_sortmerge_join/small/000001_0' INTO TABLE bucket_small partition(ds='2008-04-08'); @@ -13,12 +13,16 @@ load data local inpath '../../data/files/auto_sortmerge_join/small/000001_0' INT load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small partition(ds='2008-04-09'); load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small partition(ds='2008-04-09'); -CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-08'); load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08'); load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-09'); load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09'); set hive.auto.convert.join=true; set hive.auto.convert.sortmerge.join=true; diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out index 5cfc35aa73..dda72115c5 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out @@ -1,8 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_small -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_small @@ -23,27 +23,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small@ds=2008-04-08 -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small partition(ds='2008-04-08') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small@ds=2008-04-08 -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small partition(ds='2008-04-08') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small@ds=2008-04-08 -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small partition(ds='2008-04-08') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small@ds=2008-04-08 -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small partition(ds='2008-04-08') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small@ds=2008-04-08 -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -64,6 +48,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-09') PREHOOK: type: LOAD #### A masked pattern was here #### @@ -81,6 +81,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -95,16 +111,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -134,7 +150,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -142,7 +158,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -150,7 +166,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -158,7 +174,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -183,7 +199,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -191,7 +207,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -199,7 +215,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -207,7 +223,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -308,7 +324,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -316,7 +332,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -324,7 +340,7 @@ STAGE PLANS: serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -332,7 +348,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -353,16 +369,16 @@ STAGE PLANS: $hdt$_1:b TableScan alias: b - Statistics: Num rows: 4 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1140 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 4 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1140 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1140 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) @@ -374,16 +390,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -414,7 +430,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -422,7 +438,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -430,7 +446,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -438,7 +454,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -463,7 +479,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -471,7 +487,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -479,7 +495,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -487,7 +503,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -511,7 +527,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -519,7 +535,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -527,7 +543,7 @@ STAGE PLANS: serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -535,7 +551,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -597,7 +613,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -605,7 +621,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -613,7 +629,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -621,7 +637,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -645,7 +661,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -653,7 +669,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -661,7 +677,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -669,7 +685,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -690,16 +706,16 @@ STAGE PLANS: $hdt$_0:a TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) @@ -711,16 +727,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 4 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1140 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 4 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1140 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1140 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -751,7 +767,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -759,7 +775,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -767,7 +783,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -775,7 +791,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -800,7 +816,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -808,7 +824,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -816,7 +832,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -824,7 +840,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -848,7 +864,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -856,7 +872,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -864,7 +880,7 @@ STAGE PLANS: serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -872,7 +888,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -924,16 +940,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -963,7 +979,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -971,7 +987,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -979,7 +995,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -987,7 +1003,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1012,7 +1028,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1020,7 +1036,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -1028,7 +1044,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1036,7 +1052,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out index 0d586fd26b..b54c574358 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out @@ -72,11 +72,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small@ds=2008-04-09 -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -97,6 +97,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key @@ -111,16 +127,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -150,7 +166,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -158,7 +174,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -166,7 +182,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -174,7 +190,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -243,7 +259,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -38 +78 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -258,16 +274,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -297,7 +313,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -305,7 +321,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -313,7 +329,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -321,7 +337,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -390,7 +406,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -38 +78 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -534,16 +550,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -574,7 +590,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -582,7 +598,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -590,7 +606,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -598,7 +614,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -755,7 +771,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -763,7 +779,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -771,7 +787,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -779,7 +795,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -800,16 +816,16 @@ STAGE PLANS: $hdt$_0:a TableScan alias: a - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) @@ -861,7 +877,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -869,7 +885,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -877,7 +893,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -885,7 +901,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1034,16 +1050,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -1073,7 +1089,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1081,7 +1097,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -1089,7 +1105,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1097,7 +1113,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1166,4 +1182,4 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -38 +78 diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out index 45704d1253..451c3b3353 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out @@ -1,8 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_small -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_small @@ -22,27 +22,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -62,6 +46,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key @@ -76,16 +76,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -114,7 +114,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -122,13 +122,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -136,7 +136,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -144,13 +144,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big @@ -216,16 +216,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -254,7 +254,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -262,13 +262,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -276,7 +276,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -284,13 +284,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big @@ -369,16 +369,16 @@ STAGE PLANS: $hdt$_1:b TableScan alias: b - Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) @@ -390,16 +390,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -429,7 +429,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -437,13 +437,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -451,7 +451,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -459,13 +459,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big @@ -475,7 +475,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -492,7 +492,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -500,13 +500,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small @@ -551,16 +551,16 @@ STAGE PLANS: $hdt$_0:a TableScan alias: a - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) @@ -572,16 +572,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -611,7 +611,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -619,13 +619,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -633,7 +633,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -641,13 +641,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big @@ -657,7 +657,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -674,7 +674,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -682,13 +682,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small @@ -728,16 +728,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -766,7 +766,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -774,13 +774,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -788,7 +788,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -796,13 +796,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out index 1959075912..f335142360 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out @@ -72,11 +72,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small@ds=2008-04-09 -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -97,6 +97,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-09') PREHOOK: type: LOAD #### A masked pattern was here #### @@ -114,6 +130,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key @@ -128,16 +160,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -167,7 +199,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -175,7 +207,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -183,7 +215,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -191,7 +223,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -216,7 +248,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -224,7 +256,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -232,7 +264,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -240,7 +272,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -312,7 +344,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -76 +156 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -327,16 +359,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -366,7 +398,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -374,7 +406,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -382,7 +414,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -390,7 +422,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -415,7 +447,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -423,7 +455,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -431,7 +463,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -439,7 +471,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -511,7 +543,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -76 +156 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -655,16 +687,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -695,7 +727,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -703,7 +735,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -711,7 +743,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -719,7 +751,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -744,7 +776,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -752,7 +784,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -760,7 +792,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -768,7 +800,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -926,7 +958,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -934,7 +966,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -942,7 +974,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -950,7 +982,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -974,7 +1006,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -982,7 +1014,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -990,7 +1022,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -998,7 +1030,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1019,16 +1051,16 @@ STAGE PLANS: $hdt$_0:a TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) @@ -1080,7 +1112,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1088,7 +1120,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -1096,7 +1128,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1104,7 +1136,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1129,7 +1161,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1137,7 +1169,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -1145,7 +1177,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1153,7 +1185,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1302,16 +1334,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -1341,7 +1373,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1349,7 +1381,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -1357,7 +1389,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1365,7 +1397,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1390,7 +1422,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1398,7 +1430,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -1406,7 +1438,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1414,7 +1446,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1486,4 +1518,4 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -76 +156 diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_2.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_2.q.out index 054b0d00be..d4472cf2a0 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_2.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_2.q.out @@ -1,8 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_small -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_small @@ -23,27 +23,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small@ds=2008-04-08 -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small partition(ds='2008-04-08') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small@ds=2008-04-08 -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small partition(ds='2008-04-08') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small@ds=2008-04-08 -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small partition(ds='2008-04-08') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small@ds=2008-04-08 -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small partition(ds='2008-04-08') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small@ds=2008-04-08 -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -64,6 +48,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-09') PREHOOK: type: LOAD #### A masked pattern was here #### @@ -81,6 +81,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -101,16 +117,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 4 Data size: 2996 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1508 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 4 Data size: 2996 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1508 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 2996 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1508 Basic stats: COMPLETE Column stats: NONE Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -121,7 +137,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -129,7 +145,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -137,7 +153,7 @@ STAGE PLANS: serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -145,7 +161,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -167,16 +183,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 74872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 158376 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 107 Data size: 71529 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 228 Data size: 150457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 107 Data size: 71529 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 228 Data size: 150457 Basic stats: COMPLETE Column stats: NONE Merge Join Operator condition map: Inner Join 0 to 1 @@ -184,7 +200,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 117 Data size: 78681 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 165502 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -208,7 +224,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -216,7 +232,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -224,7 +240,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -232,7 +248,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -256,7 +272,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -264,7 +280,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -272,7 +288,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -280,7 +296,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -375,16 +391,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 4 Data size: 2996 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1508 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 4 Data size: 2996 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1508 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 2996 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1508 Basic stats: COMPLETE Column stats: NONE Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -395,7 +411,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -403,7 +419,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -411,7 +427,7 @@ STAGE PLANS: serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -419,7 +435,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -441,16 +457,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 74872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 158376 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 107 Data size: 71529 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 228 Data size: 150457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 107 Data size: 71529 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 228 Data size: 150457 Basic stats: COMPLETE Column stats: NONE Merge Join Operator condition map: Inner Join 0 to 1 @@ -458,7 +474,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 117 Data size: 78681 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 165502 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -482,7 +498,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -490,7 +506,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -498,7 +514,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -506,7 +522,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -530,7 +546,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -538,7 +554,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -546,7 +562,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -554,7 +570,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_4.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_4.q.out index 95d329862c..5cd5d798bc 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_4.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_4.q.out @@ -72,11 +72,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small@ds=2008-04-09 -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -97,6 +97,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key @@ -232,16 +248,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 56 Data size: 37620 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 79280 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 54 Data size: 36276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 114 Data size: 75316 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 54 Data size: 36276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 114 Data size: 75316 Basic stats: COMPLETE Column stats: NONE Merge Join Operator condition map: Inner Join 0 to 1 @@ -249,7 +265,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 1 - Statistics: Num rows: 59 Data size: 39903 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 82847 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -273,7 +289,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -281,7 +297,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -289,7 +305,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -297,7 +313,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -370,7 +386,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -38 +78 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -506,16 +522,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 56 Data size: 37620 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 79280 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 54 Data size: 36276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 114 Data size: 75316 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 54 Data size: 36276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 114 Data size: 75316 Basic stats: COMPLETE Column stats: NONE Merge Join Operator condition map: Inner Join 0 to 1 @@ -523,7 +539,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 59 Data size: 39903 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 82847 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -547,7 +563,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -555,7 +571,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -563,7 +579,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -571,7 +587,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -644,7 +660,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -38 +78 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -780,16 +796,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 56 Data size: 37620 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 79280 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 54 Data size: 36276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 114 Data size: 75316 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 54 Data size: 36276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 114 Data size: 75316 Basic stats: COMPLETE Column stats: NONE Merge Join Operator condition map: Inner Join 0 to 1 @@ -797,7 +813,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 59 Data size: 39903 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 82847 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -821,7 +837,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -829,7 +845,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -837,7 +853,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -845,7 +861,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -918,4 +934,4 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -38 +78 diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_5.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_5.q.out index e711715aa5..a18f4b21fc 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_5.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_5.q.out @@ -1,8 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_small -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_small @@ -22,27 +22,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -62,6 +46,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key @@ -101,7 +101,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -109,13 +109,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -123,7 +123,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -131,13 +131,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small @@ -187,7 +187,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -195,13 +195,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -209,7 +209,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -217,13 +217,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big @@ -318,7 +318,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -326,13 +326,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -340,7 +340,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -348,13 +348,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small @@ -404,7 +404,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -412,13 +412,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -426,7 +426,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -434,13 +434,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big @@ -562,7 +562,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -570,13 +570,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -584,7 +584,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -592,13 +592,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big @@ -639,7 +639,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -647,13 +647,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -661,7 +661,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -669,13 +669,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out index 53c685cb11..fdea211fa4 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out @@ -72,11 +72,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small@ds=2008-04-09 -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -97,6 +97,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-09') PREHOOK: type: LOAD #### A masked pattern was here #### @@ -114,6 +130,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key @@ -249,16 +281,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 112 Data size: 74872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 158376 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 107 Data size: 71529 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 228 Data size: 150457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 107 Data size: 71529 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 228 Data size: 150457 Basic stats: COMPLETE Column stats: NONE Merge Join Operator condition map: Inner Join 0 to 1 @@ -266,7 +298,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 1 - Statistics: Num rows: 117 Data size: 78681 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 165502 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -290,7 +322,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -298,7 +330,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -306,7 +338,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -314,7 +346,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -338,7 +370,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -346,7 +378,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -354,7 +386,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -362,7 +394,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -438,7 +470,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -76 +156 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -574,16 +606,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 74872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 158376 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 107 Data size: 71529 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 228 Data size: 150457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 107 Data size: 71529 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 228 Data size: 150457 Basic stats: COMPLETE Column stats: NONE Merge Join Operator condition map: Inner Join 0 to 1 @@ -591,7 +623,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 117 Data size: 78681 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 165502 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -615,7 +647,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -623,7 +655,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -631,7 +663,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -639,7 +671,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -663,7 +695,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -671,7 +703,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -679,7 +711,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -687,7 +719,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -763,7 +795,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -76 +156 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -899,16 +931,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 74872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 158376 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 107 Data size: 71529 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 228 Data size: 150457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 107 Data size: 71529 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 228 Data size: 150457 Basic stats: COMPLETE Column stats: NONE Merge Join Operator condition map: Inner Join 0 to 1 @@ -916,7 +948,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 117 Data size: 78681 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 165502 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -940,7 +972,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -948,7 +980,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -956,7 +988,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -964,7 +996,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -988,7 +1020,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -996,7 +1028,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -1004,7 +1036,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1012,7 +1044,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -1088,4 +1120,4 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -76 +156 diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out index 8cfa113794..117ff4aecc 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out @@ -1,8 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_small -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_small @@ -23,27 +23,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small@ds=2008-04-08 -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small partition(ds='2008-04-08') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small@ds=2008-04-08 -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small partition(ds='2008-04-08') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small@ds=2008-04-08 -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small partition(ds='2008-04-08') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small@ds=2008-04-08 -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small partition(ds='2008-04-08') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small@ds=2008-04-08 -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -64,6 +48,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-09') PREHOOK: type: LOAD #### A masked pattern was here #### @@ -81,6 +81,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -100,16 +116,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -117,7 +133,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 123 Data size: 60500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 264 Data size: 127864 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -142,7 +158,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -150,7 +166,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -158,7 +174,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -166,7 +182,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -191,7 +207,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -199,7 +215,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -207,7 +223,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -215,7 +231,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -308,16 +324,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -325,7 +341,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 123 Data size: 60500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 264 Data size: 127864 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -350,7 +366,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -358,7 +374,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -366,7 +382,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -374,7 +390,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -399,7 +415,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -407,7 +423,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -415,7 +431,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -423,7 +439,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out index fce5e0cfc4..aff5a0d242 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out @@ -72,11 +72,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small@ds=2008-04-09 -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -97,6 +97,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key @@ -116,16 +132,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -133,7 +149,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 1 - Statistics: Num rows: 61 Data size: 30250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 132 Data size: 63932 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -158,7 +174,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -166,7 +182,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -174,7 +190,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -182,7 +198,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -254,7 +270,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -38 +78 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -274,16 +290,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -291,7 +307,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 61 Data size: 30250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 132 Data size: 63932 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -316,7 +332,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -324,7 +340,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -332,7 +348,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -340,7 +356,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -412,7 +428,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -38 +78 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -432,16 +448,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -449,7 +465,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 61 Data size: 30250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 132 Data size: 63932 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -474,7 +490,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -482,7 +498,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -490,7 +506,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -498,7 +514,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -570,4 +586,4 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -38 +78 diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out index 8250eca099..6255dd2819 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out @@ -1,8 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_small -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_small @@ -22,27 +22,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_small -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_small -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -62,6 +46,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key @@ -81,16 +81,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -98,7 +98,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 1 - Statistics: Num rows: 1 Data size: 2486 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1254 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -122,7 +122,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -130,13 +130,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -144,7 +144,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -152,13 +152,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big @@ -232,16 +232,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -249,7 +249,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 1 Data size: 30250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 63932 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -273,7 +273,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -281,13 +281,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -295,7 +295,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -303,13 +303,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big @@ -382,16 +382,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) @@ -414,7 +414,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -422,13 +422,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -436,7 +436,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -444,13 +444,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small @@ -468,16 +468,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -487,7 +487,7 @@ STAGE PLANS: input vertices: 1 Map 3 Position of Big Table: 0 - Statistics: Num rows: 1 Data size: 30250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 63932 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -518,7 +518,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -526,13 +526,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -540,7 +540,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -548,13 +548,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out index eb813c1734..ac5cd47fbb 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out @@ -72,11 +72,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small@ds=2008-04-09 -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -97,6 +97,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-09') PREHOOK: type: LOAD #### A masked pattern was here #### @@ -114,6 +130,22 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key @@ -133,16 +165,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -150,7 +182,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 1 - Statistics: Num rows: 123 Data size: 60500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 264 Data size: 127864 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -175,7 +207,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -183,7 +215,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -191,7 +223,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -199,7 +231,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -224,7 +256,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -232,7 +264,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -240,7 +272,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -248,7 +280,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -323,7 +355,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -76 +156 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -343,16 +375,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -360,7 +392,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 123 Data size: 60500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 264 Data size: 127864 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -385,7 +417,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -393,7 +425,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -401,7 +433,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -409,7 +441,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -434,7 +466,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -442,7 +474,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -450,7 +482,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -458,7 +490,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -533,7 +565,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -76 +156 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -553,16 +585,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -570,7 +602,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 123 Data size: 60500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 264 Data size: 127864 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Group By Operator aggregations: count() @@ -595,7 +627,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -603,7 +635,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -611,7 +643,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -619,7 +651,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -644,7 +676,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -652,7 +684,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 2 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string @@ -660,7 +692,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -668,7 +700,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -743,4 +775,4 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -76 +156 diff --git a/standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp b/standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp index 27f8c0f2fc..1cb5900da1 100644 --- a/standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp +++ b/standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp @@ -4925,6 +4925,14 @@ void Table::__set_tableType(const std::string& val) { this->tableType = val; } +void Table::__set_bucketingVersion(const int32_t val) { + this->bucketingVersion = val; +} + +void Table::__set_expertMode(const bool val) { + this->expertMode = val; +} + void Table::__set_privileges(const PrincipalPrivilegeSet& val) { this->privileges = val; __isset.privileges = true; @@ -5090,6 +5098,22 @@ uint32_t Table::read(::apache::thrift::protocol::TProtocol* iprot) { } break; case 13: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->bucketingVersion); + this->__isset.bucketingVersion = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 14: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->expertMode); + this->__isset.expertMode = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 15: if (ftype == ::apache::thrift::protocol::T_STRUCT) { xfer += this->privileges.read(iprot); this->__isset.privileges = true; @@ -5097,7 +5121,7 @@ uint32_t Table::read(::apache::thrift::protocol::TProtocol* iprot) { xfer += iprot->skip(ftype); } break; - case 14: + case 16: if (ftype == ::apache::thrift::protocol::T_BOOL) { xfer += iprot->readBool(this->temporary); this->__isset.temporary = true; @@ -5105,7 +5129,7 @@ uint32_t Table::read(::apache::thrift::protocol::TProtocol* iprot) { xfer += iprot->skip(ftype); } break; - case 15: + case 17: if (ftype == ::apache::thrift::protocol::T_BOOL) { xfer += iprot->readBool(this->rewriteEnabled); this->__isset.rewriteEnabled = true; @@ -5113,7 +5137,7 @@ uint32_t Table::read(::apache::thrift::protocol::TProtocol* iprot) { xfer += iprot->skip(ftype); } break; - case 16: + case 18: if (ftype == ::apache::thrift::protocol::T_MAP) { { this->creationMetadata.clear(); @@ -5218,23 +5242,31 @@ uint32_t Table::write(::apache::thrift::protocol::TProtocol* oprot) const { xfer += oprot->writeString(this->tableType); xfer += oprot->writeFieldEnd(); + xfer += oprot->writeFieldBegin("bucketingVersion", ::apache::thrift::protocol::T_I32, 13); + xfer += oprot->writeI32(this->bucketingVersion); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("expertMode", ::apache::thrift::protocol::T_BOOL, 14); + xfer += oprot->writeBool(this->expertMode); + xfer += oprot->writeFieldEnd(); + if (this->__isset.privileges) { - xfer += oprot->writeFieldBegin("privileges", ::apache::thrift::protocol::T_STRUCT, 13); + xfer += oprot->writeFieldBegin("privileges", ::apache::thrift::protocol::T_STRUCT, 15); xfer += this->privileges.write(oprot); xfer += oprot->writeFieldEnd(); } if (this->__isset.temporary) { - xfer += oprot->writeFieldBegin("temporary", ::apache::thrift::protocol::T_BOOL, 14); + xfer += oprot->writeFieldBegin("temporary", ::apache::thrift::protocol::T_BOOL, 16); xfer += oprot->writeBool(this->temporary); xfer += oprot->writeFieldEnd(); } if (this->__isset.rewriteEnabled) { - xfer += oprot->writeFieldBegin("rewriteEnabled", ::apache::thrift::protocol::T_BOOL, 15); + xfer += oprot->writeFieldBegin("rewriteEnabled", ::apache::thrift::protocol::T_BOOL, 17); xfer += oprot->writeBool(this->rewriteEnabled); xfer += oprot->writeFieldEnd(); } if (this->__isset.creationMetadata) { - xfer += oprot->writeFieldBegin("creationMetadata", ::apache::thrift::protocol::T_MAP, 16); + xfer += oprot->writeFieldBegin("creationMetadata", ::apache::thrift::protocol::T_MAP, 18); { xfer += oprot->writeMapBegin(::apache::thrift::protocol::T_STRING, ::apache::thrift::protocol::T_STRUCT, static_cast(this->creationMetadata.size())); std::map ::const_iterator _iter232; @@ -5266,6 +5298,8 @@ void swap(Table &a, Table &b) { swap(a.viewOriginalText, b.viewOriginalText); swap(a.viewExpandedText, b.viewExpandedText); swap(a.tableType, b.tableType); + swap(a.bucketingVersion, b.bucketingVersion); + swap(a.expertMode, b.expertMode); swap(a.privileges, b.privileges); swap(a.temporary, b.temporary); swap(a.rewriteEnabled, b.rewriteEnabled); @@ -5286,6 +5320,8 @@ Table::Table(const Table& other233) { viewOriginalText = other233.viewOriginalText; viewExpandedText = other233.viewExpandedText; tableType = other233.tableType; + bucketingVersion = other233.bucketingVersion; + expertMode = other233.expertMode; privileges = other233.privileges; temporary = other233.temporary; rewriteEnabled = other233.rewriteEnabled; @@ -5305,6 +5341,8 @@ Table& Table::operator=(const Table& other234) { viewOriginalText = other234.viewOriginalText; viewExpandedText = other234.viewExpandedText; tableType = other234.tableType; + bucketingVersion = other234.bucketingVersion; + expertMode = other234.expertMode; privileges = other234.privileges; temporary = other234.temporary; rewriteEnabled = other234.rewriteEnabled; @@ -5327,6 +5365,8 @@ void Table::printTo(std::ostream& out) const { out << ", " << "viewOriginalText=" << to_string(viewOriginalText); out << ", " << "viewExpandedText=" << to_string(viewExpandedText); out << ", " << "tableType=" << to_string(tableType); + out << ", " << "bucketingVersion=" << to_string(bucketingVersion); + out << ", " << "expertMode=" << to_string(expertMode); out << ", " << "privileges="; (__isset.privileges ? (out << to_string(privileges)) : (out << "")); out << ", " << "temporary="; (__isset.temporary ? (out << to_string(temporary)) : (out << "")); out << ", " << "rewriteEnabled="; (__isset.rewriteEnabled ? (out << to_string(rewriteEnabled)) : (out << "")); diff --git a/standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h b/standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h index df646a7d17..8c828a9f0f 100644 --- a/standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h +++ b/standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h @@ -2372,7 +2372,7 @@ inline std::ostream& operator<<(std::ostream& out, const StorageDescriptor& obj) } typedef struct _Table__isset { - _Table__isset() : tableName(false), dbName(false), owner(false), createTime(false), lastAccessTime(false), retention(false), sd(false), partitionKeys(false), parameters(false), viewOriginalText(false), viewExpandedText(false), tableType(false), privileges(false), temporary(true), rewriteEnabled(false), creationMetadata(false) {} + _Table__isset() : tableName(false), dbName(false), owner(false), createTime(false), lastAccessTime(false), retention(false), sd(false), partitionKeys(false), parameters(false), viewOriginalText(false), viewExpandedText(false), tableType(false), bucketingVersion(false), expertMode(false), privileges(false), temporary(true), rewriteEnabled(false), creationMetadata(false) {} bool tableName :1; bool dbName :1; bool owner :1; @@ -2385,6 +2385,8 @@ typedef struct _Table__isset { bool viewOriginalText :1; bool viewExpandedText :1; bool tableType :1; + bool bucketingVersion :1; + bool expertMode :1; bool privileges :1; bool temporary :1; bool rewriteEnabled :1; @@ -2396,7 +2398,7 @@ class Table { Table(const Table&); Table& operator=(const Table&); - Table() : tableName(), dbName(), owner(), createTime(0), lastAccessTime(0), retention(0), viewOriginalText(), viewExpandedText(), tableType(), temporary(false), rewriteEnabled(0) { + Table() : tableName(), dbName(), owner(), createTime(0), lastAccessTime(0), retention(0), viewOriginalText(), viewExpandedText(), tableType(), bucketingVersion(0), expertMode(0), temporary(false), rewriteEnabled(0) { } virtual ~Table() throw(); @@ -2412,6 +2414,8 @@ class Table { std::string viewOriginalText; std::string viewExpandedText; std::string tableType; + int32_t bucketingVersion; + bool expertMode; PrincipalPrivilegeSet privileges; bool temporary; bool rewriteEnabled; @@ -2443,6 +2447,10 @@ class Table { void __set_tableType(const std::string& val); + void __set_bucketingVersion(const int32_t val); + + void __set_expertMode(const bool val); + void __set_privileges(const PrincipalPrivilegeSet& val); void __set_temporary(const bool val); @@ -2477,6 +2485,10 @@ class Table { return false; if (!(tableType == rhs.tableType)) return false; + if (!(bucketingVersion == rhs.bucketingVersion)) + return false; + if (!(expertMode == rhs.expertMode)) + return false; if (__isset.privileges != rhs.__isset.privileges) return false; else if (__isset.privileges && !(privileges == rhs.privileges)) diff --git a/standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Table.java b/standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Table.java index f317b0393f..70d35212b0 100644 --- a/standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Table.java +++ b/standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Table.java @@ -50,10 +50,12 @@ private static final org.apache.thrift.protocol.TField VIEW_ORIGINAL_TEXT_FIELD_DESC = new org.apache.thrift.protocol.TField("viewOriginalText", org.apache.thrift.protocol.TType.STRING, (short)10); private static final org.apache.thrift.protocol.TField VIEW_EXPANDED_TEXT_FIELD_DESC = new org.apache.thrift.protocol.TField("viewExpandedText", org.apache.thrift.protocol.TType.STRING, (short)11); private static final org.apache.thrift.protocol.TField TABLE_TYPE_FIELD_DESC = new org.apache.thrift.protocol.TField("tableType", org.apache.thrift.protocol.TType.STRING, (short)12); - private static final org.apache.thrift.protocol.TField PRIVILEGES_FIELD_DESC = new org.apache.thrift.protocol.TField("privileges", org.apache.thrift.protocol.TType.STRUCT, (short)13); - private static final org.apache.thrift.protocol.TField TEMPORARY_FIELD_DESC = new org.apache.thrift.protocol.TField("temporary", org.apache.thrift.protocol.TType.BOOL, (short)14); - private static final org.apache.thrift.protocol.TField REWRITE_ENABLED_FIELD_DESC = new org.apache.thrift.protocol.TField("rewriteEnabled", org.apache.thrift.protocol.TType.BOOL, (short)15); - private static final org.apache.thrift.protocol.TField CREATION_METADATA_FIELD_DESC = new org.apache.thrift.protocol.TField("creationMetadata", org.apache.thrift.protocol.TType.MAP, (short)16); + private static final org.apache.thrift.protocol.TField BUCKETING_VERSION_FIELD_DESC = new org.apache.thrift.protocol.TField("bucketingVersion", org.apache.thrift.protocol.TType.I32, (short)13); + private static final org.apache.thrift.protocol.TField EXPERT_MODE_FIELD_DESC = new org.apache.thrift.protocol.TField("expertMode", org.apache.thrift.protocol.TType.BOOL, (short)14); + private static final org.apache.thrift.protocol.TField PRIVILEGES_FIELD_DESC = new org.apache.thrift.protocol.TField("privileges", org.apache.thrift.protocol.TType.STRUCT, (short)15); + private static final org.apache.thrift.protocol.TField TEMPORARY_FIELD_DESC = new org.apache.thrift.protocol.TField("temporary", org.apache.thrift.protocol.TType.BOOL, (short)16); + private static final org.apache.thrift.protocol.TField REWRITE_ENABLED_FIELD_DESC = new org.apache.thrift.protocol.TField("rewriteEnabled", org.apache.thrift.protocol.TType.BOOL, (short)17); + private static final org.apache.thrift.protocol.TField CREATION_METADATA_FIELD_DESC = new org.apache.thrift.protocol.TField("creationMetadata", org.apache.thrift.protocol.TType.MAP, (short)18); private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); static { @@ -73,6 +75,8 @@ private String viewOriginalText; // required private String viewExpandedText; // required private String tableType; // required + private int bucketingVersion; // required + private boolean expertMode; // required private PrincipalPrivilegeSet privileges; // optional private boolean temporary; // optional private boolean rewriteEnabled; // optional @@ -92,10 +96,12 @@ VIEW_ORIGINAL_TEXT((short)10, "viewOriginalText"), VIEW_EXPANDED_TEXT((short)11, "viewExpandedText"), TABLE_TYPE((short)12, "tableType"), - PRIVILEGES((short)13, "privileges"), - TEMPORARY((short)14, "temporary"), - REWRITE_ENABLED((short)15, "rewriteEnabled"), - CREATION_METADATA((short)16, "creationMetadata"); + BUCKETING_VERSION((short)13, "bucketingVersion"), + EXPERT_MODE((short)14, "expertMode"), + PRIVILEGES((short)15, "privileges"), + TEMPORARY((short)16, "temporary"), + REWRITE_ENABLED((short)17, "rewriteEnabled"), + CREATION_METADATA((short)18, "creationMetadata"); private static final Map byName = new HashMap(); @@ -134,13 +140,17 @@ public static _Fields findByThriftId(int fieldId) { return VIEW_EXPANDED_TEXT; case 12: // TABLE_TYPE return TABLE_TYPE; - case 13: // PRIVILEGES + case 13: // BUCKETING_VERSION + return BUCKETING_VERSION; + case 14: // EXPERT_MODE + return EXPERT_MODE; + case 15: // PRIVILEGES return PRIVILEGES; - case 14: // TEMPORARY + case 16: // TEMPORARY return TEMPORARY; - case 15: // REWRITE_ENABLED + case 17: // REWRITE_ENABLED return REWRITE_ENABLED; - case 16: // CREATION_METADATA + case 18: // CREATION_METADATA return CREATION_METADATA; default: return null; @@ -185,8 +195,10 @@ public String getFieldName() { private static final int __CREATETIME_ISSET_ID = 0; private static final int __LASTACCESSTIME_ISSET_ID = 1; private static final int __RETENTION_ISSET_ID = 2; - private static final int __TEMPORARY_ISSET_ID = 3; - private static final int __REWRITEENABLED_ISSET_ID = 4; + private static final int __BUCKETINGVERSION_ISSET_ID = 3; + private static final int __EXPERTMODE_ISSET_ID = 4; + private static final int __TEMPORARY_ISSET_ID = 5; + private static final int __REWRITEENABLED_ISSET_ID = 6; private byte __isset_bitfield = 0; private static final _Fields optionals[] = {_Fields.PRIVILEGES,_Fields.TEMPORARY,_Fields.REWRITE_ENABLED,_Fields.CREATION_METADATA}; public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; @@ -219,6 +231,10 @@ public String getFieldName() { new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); tmpMap.put(_Fields.TABLE_TYPE, new org.apache.thrift.meta_data.FieldMetaData("tableType", org.apache.thrift.TFieldRequirementType.DEFAULT, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); + tmpMap.put(_Fields.BUCKETING_VERSION, new org.apache.thrift.meta_data.FieldMetaData("bucketingVersion", org.apache.thrift.TFieldRequirementType.DEFAULT, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32))); + tmpMap.put(_Fields.EXPERT_MODE, new org.apache.thrift.meta_data.FieldMetaData("expertMode", org.apache.thrift.TFieldRequirementType.DEFAULT, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL))); tmpMap.put(_Fields.PRIVILEGES, new org.apache.thrift.meta_data.FieldMetaData("privileges", org.apache.thrift.TFieldRequirementType.OPTIONAL, new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, PrincipalPrivilegeSet.class))); tmpMap.put(_Fields.TEMPORARY, new org.apache.thrift.meta_data.FieldMetaData("temporary", org.apache.thrift.TFieldRequirementType.OPTIONAL, @@ -250,7 +266,9 @@ public Table( Map parameters, String viewOriginalText, String viewExpandedText, - String tableType) + String tableType, + int bucketingVersion, + boolean expertMode) { this(); this.tableName = tableName; @@ -268,6 +286,10 @@ public Table( this.viewOriginalText = viewOriginalText; this.viewExpandedText = viewExpandedText; this.tableType = tableType; + this.bucketingVersion = bucketingVersion; + setBucketingVersionIsSet(true); + this.expertMode = expertMode; + setExpertModeIsSet(true); } /** @@ -310,6 +332,8 @@ public Table(Table other) { if (other.isSetTableType()) { this.tableType = other.tableType; } + this.bucketingVersion = other.bucketingVersion; + this.expertMode = other.expertMode; if (other.isSetPrivileges()) { this.privileges = new PrincipalPrivilegeSet(other.privileges); } @@ -353,6 +377,10 @@ public void clear() { this.viewOriginalText = null; this.viewExpandedText = null; this.tableType = null; + setBucketingVersionIsSet(false); + this.bucketingVersion = 0; + setExpertModeIsSet(false); + this.expertMode = false; this.privileges = null; this.temporary = false; @@ -660,6 +688,50 @@ public void setTableTypeIsSet(boolean value) { } } + public int getBucketingVersion() { + return this.bucketingVersion; + } + + public void setBucketingVersion(int bucketingVersion) { + this.bucketingVersion = bucketingVersion; + setBucketingVersionIsSet(true); + } + + public void unsetBucketingVersion() { + __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __BUCKETINGVERSION_ISSET_ID); + } + + /** Returns true if field bucketingVersion is set (has been assigned a value) and false otherwise */ + public boolean isSetBucketingVersion() { + return EncodingUtils.testBit(__isset_bitfield, __BUCKETINGVERSION_ISSET_ID); + } + + public void setBucketingVersionIsSet(boolean value) { + __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __BUCKETINGVERSION_ISSET_ID, value); + } + + public boolean isExpertMode() { + return this.expertMode; + } + + public void setExpertMode(boolean expertMode) { + this.expertMode = expertMode; + setExpertModeIsSet(true); + } + + public void unsetExpertMode() { + __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __EXPERTMODE_ISSET_ID); + } + + /** Returns true if field expertMode is set (has been assigned a value) and false otherwise */ + public boolean isSetExpertMode() { + return EncodingUtils.testBit(__isset_bitfield, __EXPERTMODE_ISSET_ID); + } + + public void setExpertModeIsSet(boolean value) { + __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __EXPERTMODE_ISSET_ID, value); + } + public PrincipalPrivilegeSet getPrivileges() { return this.privileges; } @@ -859,6 +931,22 @@ public void setFieldValue(_Fields field, Object value) { } break; + case BUCKETING_VERSION: + if (value == null) { + unsetBucketingVersion(); + } else { + setBucketingVersion((Integer)value); + } + break; + + case EXPERT_MODE: + if (value == null) { + unsetExpertMode(); + } else { + setExpertMode((Boolean)value); + } + break; + case PRIVILEGES: if (value == null) { unsetPrivileges(); @@ -932,6 +1020,12 @@ public Object getFieldValue(_Fields field) { case TABLE_TYPE: return getTableType(); + case BUCKETING_VERSION: + return getBucketingVersion(); + + case EXPERT_MODE: + return isExpertMode(); + case PRIVILEGES: return getPrivileges(); @@ -979,6 +1073,10 @@ public boolean isSet(_Fields field) { return isSetViewExpandedText(); case TABLE_TYPE: return isSetTableType(); + case BUCKETING_VERSION: + return isSetBucketingVersion(); + case EXPERT_MODE: + return isSetExpertMode(); case PRIVILEGES: return isSetPrivileges(); case TEMPORARY: @@ -1112,6 +1210,24 @@ public boolean equals(Table that) { return false; } + boolean this_present_bucketingVersion = true; + boolean that_present_bucketingVersion = true; + if (this_present_bucketingVersion || that_present_bucketingVersion) { + if (!(this_present_bucketingVersion && that_present_bucketingVersion)) + return false; + if (this.bucketingVersion != that.bucketingVersion) + return false; + } + + boolean this_present_expertMode = true; + boolean that_present_expertMode = true; + if (this_present_expertMode || that_present_expertMode) { + if (!(this_present_expertMode && that_present_expertMode)) + return false; + if (this.expertMode != that.expertMode) + return false; + } + boolean this_present_privileges = true && this.isSetPrivileges(); boolean that_present_privileges = true && that.isSetPrivileges(); if (this_present_privileges || that_present_privileges) { @@ -1215,6 +1331,16 @@ public int hashCode() { if (present_tableType) list.add(tableType); + boolean present_bucketingVersion = true; + list.add(present_bucketingVersion); + if (present_bucketingVersion) + list.add(bucketingVersion); + + boolean present_expertMode = true; + list.add(present_expertMode); + if (present_expertMode) + list.add(expertMode); + boolean present_privileges = true && (isSetPrivileges()); list.add(present_privileges); if (present_privileges) @@ -1366,6 +1492,26 @@ public int compareTo(Table other) { return lastComparison; } } + lastComparison = Boolean.valueOf(isSetBucketingVersion()).compareTo(other.isSetBucketingVersion()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetBucketingVersion()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.bucketingVersion, other.bucketingVersion); + if (lastComparison != 0) { + return lastComparison; + } + } + lastComparison = Boolean.valueOf(isSetExpertMode()).compareTo(other.isSetExpertMode()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetExpertMode()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.expertMode, other.expertMode); + if (lastComparison != 0) { + return lastComparison; + } + } lastComparison = Boolean.valueOf(isSetPrivileges()).compareTo(other.isSetPrivileges()); if (lastComparison != 0) { return lastComparison; @@ -1509,6 +1655,14 @@ public String toString() { sb.append(this.tableType); } first = false; + if (!first) sb.append(", "); + sb.append("bucketingVersion:"); + sb.append(this.bucketingVersion); + first = false; + if (!first) sb.append(", "); + sb.append("expertMode:"); + sb.append(this.expertMode); + first = false; if (isSetPrivileges()) { if (!first) sb.append(", "); sb.append("privileges:"); @@ -1712,7 +1866,23 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, Table struct) throw org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } break; - case 13: // PRIVILEGES + case 13: // BUCKETING_VERSION + if (schemeField.type == org.apache.thrift.protocol.TType.I32) { + struct.bucketingVersion = iprot.readI32(); + struct.setBucketingVersionIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; + case 14: // EXPERT_MODE + if (schemeField.type == org.apache.thrift.protocol.TType.BOOL) { + struct.expertMode = iprot.readBool(); + struct.setExpertModeIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; + case 15: // PRIVILEGES if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { struct.privileges = new PrincipalPrivilegeSet(); struct.privileges.read(iprot); @@ -1721,7 +1891,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, Table struct) throw org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } break; - case 14: // TEMPORARY + case 16: // TEMPORARY if (schemeField.type == org.apache.thrift.protocol.TType.BOOL) { struct.temporary = iprot.readBool(); struct.setTemporaryIsSet(true); @@ -1729,7 +1899,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, Table struct) throw org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } break; - case 15: // REWRITE_ENABLED + case 17: // REWRITE_ENABLED if (schemeField.type == org.apache.thrift.protocol.TType.BOOL) { struct.rewriteEnabled = iprot.readBool(); struct.setRewriteEnabledIsSet(true); @@ -1737,7 +1907,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, Table struct) throw org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } break; - case 16: // CREATION_METADATA + case 18: // CREATION_METADATA if (schemeField.type == org.apache.thrift.protocol.TType.MAP) { { org.apache.thrift.protocol.TMap _map197 = iprot.readMapBegin(); @@ -1840,6 +2010,12 @@ public void write(org.apache.thrift.protocol.TProtocol oprot, Table struct) thro oprot.writeString(struct.tableType); oprot.writeFieldEnd(); } + oprot.writeFieldBegin(BUCKETING_VERSION_FIELD_DESC); + oprot.writeI32(struct.bucketingVersion); + oprot.writeFieldEnd(); + oprot.writeFieldBegin(EXPERT_MODE_FIELD_DESC); + oprot.writeBool(struct.expertMode); + oprot.writeFieldEnd(); if (struct.privileges != null) { if (struct.isSetPrivileges()) { oprot.writeFieldBegin(PRIVILEGES_FIELD_DESC); @@ -1926,19 +2102,25 @@ public void write(org.apache.thrift.protocol.TProtocol prot, Table struct) throw if (struct.isSetTableType()) { optionals.set(11); } - if (struct.isSetPrivileges()) { + if (struct.isSetBucketingVersion()) { optionals.set(12); } - if (struct.isSetTemporary()) { + if (struct.isSetExpertMode()) { optionals.set(13); } - if (struct.isSetRewriteEnabled()) { + if (struct.isSetPrivileges()) { optionals.set(14); } - if (struct.isSetCreationMetadata()) { + if (struct.isSetTemporary()) { optionals.set(15); } - oprot.writeBitSet(optionals, 16); + if (struct.isSetRewriteEnabled()) { + optionals.set(16); + } + if (struct.isSetCreationMetadata()) { + optionals.set(17); + } + oprot.writeBitSet(optionals, 18); if (struct.isSetTableName()) { oprot.writeString(struct.tableName); } @@ -1988,6 +2170,12 @@ public void write(org.apache.thrift.protocol.TProtocol prot, Table struct) throw if (struct.isSetTableType()) { oprot.writeString(struct.tableType); } + if (struct.isSetBucketingVersion()) { + oprot.writeI32(struct.bucketingVersion); + } + if (struct.isSetExpertMode()) { + oprot.writeBool(struct.expertMode); + } if (struct.isSetPrivileges()) { struct.privileges.write(oprot); } @@ -2012,7 +2200,7 @@ public void write(org.apache.thrift.protocol.TProtocol prot, Table struct) throw @Override public void read(org.apache.thrift.protocol.TProtocol prot, Table struct) throws org.apache.thrift.TException { TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(16); + BitSet incoming = iprot.readBitSet(18); if (incoming.get(0)) { struct.tableName = iprot.readString(); struct.setTableNameIsSet(true); @@ -2084,19 +2272,27 @@ public void read(org.apache.thrift.protocol.TProtocol prot, Table struct) throws struct.setTableTypeIsSet(true); } if (incoming.get(12)) { + struct.bucketingVersion = iprot.readI32(); + struct.setBucketingVersionIsSet(true); + } + if (incoming.get(13)) { + struct.expertMode = iprot.readBool(); + struct.setExpertModeIsSet(true); + } + if (incoming.get(14)) { struct.privileges = new PrincipalPrivilegeSet(); struct.privileges.read(iprot); struct.setPrivilegesIsSet(true); } - if (incoming.get(13)) { + if (incoming.get(15)) { struct.temporary = iprot.readBool(); struct.setTemporaryIsSet(true); } - if (incoming.get(14)) { + if (incoming.get(16)) { struct.rewriteEnabled = iprot.readBool(); struct.setRewriteEnabledIsSet(true); } - if (incoming.get(15)) { + if (incoming.get(17)) { { org.apache.thrift.protocol.TMap _map214 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRUCT, iprot.readI32()); struct.creationMetadata = new HashMap(2*_map214.size); diff --git a/standalone-metastore/src/gen/thrift/gen-php/metastore/Types.php b/standalone-metastore/src/gen/thrift/gen-php/metastore/Types.php index 6878ee1be7..64fca92c51 100644 --- a/standalone-metastore/src/gen/thrift/gen-php/metastore/Types.php +++ b/standalone-metastore/src/gen/thrift/gen-php/metastore/Types.php @@ -5026,6 +5026,14 @@ class Table { * @var string */ public $tableType = null; + /** + * @var int + */ + public $bucketingVersion = null; + /** + * @var bool + */ + public $expertMode = null; /** * @var \metastore\PrincipalPrivilegeSet */ @@ -5109,19 +5117,27 @@ class Table { 'type' => TType::STRING, ), 13 => array( + 'var' => 'bucketingVersion', + 'type' => TType::I32, + ), + 14 => array( + 'var' => 'expertMode', + 'type' => TType::BOOL, + ), + 15 => array( 'var' => 'privileges', 'type' => TType::STRUCT, 'class' => '\metastore\PrincipalPrivilegeSet', ), - 14 => array( + 16 => array( 'var' => 'temporary', 'type' => TType::BOOL, ), - 15 => array( + 17 => array( 'var' => 'rewriteEnabled', 'type' => TType::BOOL, ), - 16 => array( + 18 => array( 'var' => 'creationMetadata', 'type' => TType::MAP, 'ktype' => TType::STRING, @@ -5173,6 +5189,12 @@ class Table { if (isset($vals['tableType'])) { $this->tableType = $vals['tableType']; } + if (isset($vals['bucketingVersion'])) { + $this->bucketingVersion = $vals['bucketingVersion']; + } + if (isset($vals['expertMode'])) { + $this->expertMode = $vals['expertMode']; + } if (isset($vals['privileges'])) { $this->privileges = $vals['privileges']; } @@ -5317,6 +5339,20 @@ class Table { } break; case 13: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->bucketingVersion); + } else { + $xfer += $input->skip($ftype); + } + break; + case 14: + if ($ftype == TType::BOOL) { + $xfer += $input->readBool($this->expertMode); + } else { + $xfer += $input->skip($ftype); + } + break; + case 15: if ($ftype == TType::STRUCT) { $this->privileges = new \metastore\PrincipalPrivilegeSet(); $xfer += $this->privileges->read($input); @@ -5324,21 +5360,21 @@ class Table { $xfer += $input->skip($ftype); } break; - case 14: + case 16: if ($ftype == TType::BOOL) { $xfer += $input->readBool($this->temporary); } else { $xfer += $input->skip($ftype); } break; - case 15: + case 17: if ($ftype == TType::BOOL) { $xfer += $input->readBool($this->rewriteEnabled); } else { $xfer += $input->skip($ftype); } break; - case 16: + case 18: if ($ftype == TType::MAP) { $this->creationMetadata = array(); $_size181 = 0; @@ -5460,21 +5496,31 @@ class Table { $xfer += $output->writeString($this->tableType); $xfer += $output->writeFieldEnd(); } + if ($this->bucketingVersion !== null) { + $xfer += $output->writeFieldBegin('bucketingVersion', TType::I32, 13); + $xfer += $output->writeI32($this->bucketingVersion); + $xfer += $output->writeFieldEnd(); + } + if ($this->expertMode !== null) { + $xfer += $output->writeFieldBegin('expertMode', TType::BOOL, 14); + $xfer += $output->writeBool($this->expertMode); + $xfer += $output->writeFieldEnd(); + } if ($this->privileges !== null) { if (!is_object($this->privileges)) { throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); } - $xfer += $output->writeFieldBegin('privileges', TType::STRUCT, 13); + $xfer += $output->writeFieldBegin('privileges', TType::STRUCT, 15); $xfer += $this->privileges->write($output); $xfer += $output->writeFieldEnd(); } if ($this->temporary !== null) { - $xfer += $output->writeFieldBegin('temporary', TType::BOOL, 14); + $xfer += $output->writeFieldBegin('temporary', TType::BOOL, 16); $xfer += $output->writeBool($this->temporary); $xfer += $output->writeFieldEnd(); } if ($this->rewriteEnabled !== null) { - $xfer += $output->writeFieldBegin('rewriteEnabled', TType::BOOL, 15); + $xfer += $output->writeFieldBegin('rewriteEnabled', TType::BOOL, 17); $xfer += $output->writeBool($this->rewriteEnabled); $xfer += $output->writeFieldEnd(); } @@ -5482,7 +5528,7 @@ class Table { if (!is_array($this->creationMetadata)) { throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); } - $xfer += $output->writeFieldBegin('creationMetadata', TType::MAP, 16); + $xfer += $output->writeFieldBegin('creationMetadata', TType::MAP, 18); { $output->writeMapBegin(TType::STRING, TType::STRUCT, count($this->creationMetadata)); { diff --git a/standalone-metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py b/standalone-metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py index 25e9a889b2..592d7b0aba 100644 --- a/standalone-metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py +++ b/standalone-metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py @@ -3464,6 +3464,8 @@ class Table: - viewOriginalText - viewExpandedText - tableType + - bucketingVersion + - expertMode - privileges - temporary - rewriteEnabled @@ -3484,13 +3486,15 @@ class Table: (10, TType.STRING, 'viewOriginalText', None, None, ), # 10 (11, TType.STRING, 'viewExpandedText', None, None, ), # 11 (12, TType.STRING, 'tableType', None, None, ), # 12 - (13, TType.STRUCT, 'privileges', (PrincipalPrivilegeSet, PrincipalPrivilegeSet.thrift_spec), None, ), # 13 - (14, TType.BOOL, 'temporary', None, False, ), # 14 - (15, TType.BOOL, 'rewriteEnabled', None, None, ), # 15 - (16, TType.MAP, 'creationMetadata', (TType.STRING,None,TType.STRUCT,(BasicTxnInfo, BasicTxnInfo.thrift_spec)), None, ), # 16 + (13, TType.I32, 'bucketingVersion', None, None, ), # 13 + (14, TType.BOOL, 'expertMode', None, None, ), # 14 + (15, TType.STRUCT, 'privileges', (PrincipalPrivilegeSet, PrincipalPrivilegeSet.thrift_spec), None, ), # 15 + (16, TType.BOOL, 'temporary', None, False, ), # 16 + (17, TType.BOOL, 'rewriteEnabled', None, None, ), # 17 + (18, TType.MAP, 'creationMetadata', (TType.STRING,None,TType.STRUCT,(BasicTxnInfo, BasicTxnInfo.thrift_spec)), None, ), # 18 ) - def __init__(self, tableName=None, dbName=None, owner=None, createTime=None, lastAccessTime=None, retention=None, sd=None, partitionKeys=None, parameters=None, viewOriginalText=None, viewExpandedText=None, tableType=None, privileges=None, temporary=thrift_spec[14][4], rewriteEnabled=None, creationMetadata=None,): + def __init__(self, tableName=None, dbName=None, owner=None, createTime=None, lastAccessTime=None, retention=None, sd=None, partitionKeys=None, parameters=None, viewOriginalText=None, viewExpandedText=None, tableType=None, bucketingVersion=None, expertMode=None, privileges=None, temporary=thrift_spec[16][4], rewriteEnabled=None, creationMetadata=None,): self.tableName = tableName self.dbName = dbName self.owner = owner @@ -3503,6 +3507,8 @@ def __init__(self, tableName=None, dbName=None, owner=None, createTime=None, las self.viewOriginalText = viewOriginalText self.viewExpandedText = viewExpandedText self.tableType = tableType + self.bucketingVersion = bucketingVersion + self.expertMode = expertMode self.privileges = privileges self.temporary = temporary self.rewriteEnabled = rewriteEnabled @@ -3591,22 +3597,32 @@ def read(self, iprot): else: iprot.skip(ftype) elif fid == 13: + if ftype == TType.I32: + self.bucketingVersion = iprot.readI32() + else: + iprot.skip(ftype) + elif fid == 14: + if ftype == TType.BOOL: + self.expertMode = iprot.readBool() + else: + iprot.skip(ftype) + elif fid == 15: if ftype == TType.STRUCT: self.privileges = PrincipalPrivilegeSet() self.privileges.read(iprot) else: iprot.skip(ftype) - elif fid == 14: + elif fid == 16: if ftype == TType.BOOL: self.temporary = iprot.readBool() else: iprot.skip(ftype) - elif fid == 15: + elif fid == 17: if ftype == TType.BOOL: self.rewriteEnabled = iprot.readBool() else: iprot.skip(ftype) - elif fid == 16: + elif fid == 18: if ftype == TType.MAP: self.creationMetadata = {} (_ktype182, _vtype183, _size181 ) = iprot.readMapBegin() @@ -3683,20 +3699,28 @@ def write(self, oprot): oprot.writeFieldBegin('tableType', TType.STRING, 12) oprot.writeString(self.tableType) oprot.writeFieldEnd() + if self.bucketingVersion is not None: + oprot.writeFieldBegin('bucketingVersion', TType.I32, 13) + oprot.writeI32(self.bucketingVersion) + oprot.writeFieldEnd() + if self.expertMode is not None: + oprot.writeFieldBegin('expertMode', TType.BOOL, 14) + oprot.writeBool(self.expertMode) + oprot.writeFieldEnd() if self.privileges is not None: - oprot.writeFieldBegin('privileges', TType.STRUCT, 13) + oprot.writeFieldBegin('privileges', TType.STRUCT, 15) self.privileges.write(oprot) oprot.writeFieldEnd() if self.temporary is not None: - oprot.writeFieldBegin('temporary', TType.BOOL, 14) + oprot.writeFieldBegin('temporary', TType.BOOL, 16) oprot.writeBool(self.temporary) oprot.writeFieldEnd() if self.rewriteEnabled is not None: - oprot.writeFieldBegin('rewriteEnabled', TType.BOOL, 15) + oprot.writeFieldBegin('rewriteEnabled', TType.BOOL, 17) oprot.writeBool(self.rewriteEnabled) oprot.writeFieldEnd() if self.creationMetadata is not None: - oprot.writeFieldBegin('creationMetadata', TType.MAP, 16) + oprot.writeFieldBegin('creationMetadata', TType.MAP, 18) oprot.writeMapBegin(TType.STRING, TType.STRUCT, len(self.creationMetadata)) for kiter191,viter192 in self.creationMetadata.items(): oprot.writeString(kiter191) @@ -3724,6 +3748,8 @@ def __hash__(self): value = (value * 31) ^ hash(self.viewOriginalText) value = (value * 31) ^ hash(self.viewExpandedText) value = (value * 31) ^ hash(self.tableType) + value = (value * 31) ^ hash(self.bucketingVersion) + value = (value * 31) ^ hash(self.expertMode) value = (value * 31) ^ hash(self.privileges) value = (value * 31) ^ hash(self.temporary) value = (value * 31) ^ hash(self.rewriteEnabled) diff --git a/standalone-metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb b/standalone-metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb index 3a11a0582a..d7108cfee2 100644 --- a/standalone-metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb +++ b/standalone-metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb @@ -806,10 +806,12 @@ class Table VIEWORIGINALTEXT = 10 VIEWEXPANDEDTEXT = 11 TABLETYPE = 12 - PRIVILEGES = 13 - TEMPORARY = 14 - REWRITEENABLED = 15 - CREATIONMETADATA = 16 + BUCKETINGVERSION = 13 + EXPERTMODE = 14 + PRIVILEGES = 15 + TEMPORARY = 16 + REWRITEENABLED = 17 + CREATIONMETADATA = 18 FIELDS = { TABLENAME => {:type => ::Thrift::Types::STRING, :name => 'tableName'}, @@ -824,6 +826,8 @@ class Table VIEWORIGINALTEXT => {:type => ::Thrift::Types::STRING, :name => 'viewOriginalText'}, VIEWEXPANDEDTEXT => {:type => ::Thrift::Types::STRING, :name => 'viewExpandedText'}, TABLETYPE => {:type => ::Thrift::Types::STRING, :name => 'tableType'}, + BUCKETINGVERSION => {:type => ::Thrift::Types::I32, :name => 'bucketingVersion'}, + EXPERTMODE => {:type => ::Thrift::Types::BOOL, :name => 'expertMode'}, PRIVILEGES => {:type => ::Thrift::Types::STRUCT, :name => 'privileges', :class => ::PrincipalPrivilegeSet, :optional => true}, TEMPORARY => {:type => ::Thrift::Types::BOOL, :name => 'temporary', :default => false, :optional => true}, REWRITEENABLED => {:type => ::Thrift::Types::BOOL, :name => 'rewriteEnabled', :optional => true}, diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java index b3d99a1da5..e09bb68d92 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -1635,7 +1635,8 @@ private Table convertToTable(MTable mtbl) throws MetaException { .getOwner(), mtbl.getCreateTime(), mtbl.getLastAccessTime(), mtbl .getRetention(), convertToStorageDescriptor(mtbl.getSd()), convertToFieldSchemas(mtbl.getPartitionKeys()), convertMap(mtbl.getParameters()), - mtbl.getViewOriginalText(), mtbl.getViewExpandedText(), tableType); + mtbl.getViewOriginalText(), mtbl.getViewExpandedText(), tableType, + mtbl.getBucketingVersion(), mtbl.isExpertMode()); t.setCreationMetadata(convertToCreationMetadata(mtbl.getCreationMetadata())); t.setRewriteEnabled(mtbl.isRewriteEnabled()); return t; @@ -1676,7 +1677,8 @@ private MTable convertToMTable(Table tbl) throws InvalidObjectException, .getCreateTime(), tbl.getLastAccessTime(), tbl.getRetention(), convertToMFieldSchemas(tbl.getPartitionKeys()), tbl.getParameters(), tbl.getViewOriginalText(), tbl.getViewExpandedText(), tbl.isRewriteEnabled(), - convertToMCreationMetadata(tbl.getCreationMetadata()), tableType); + convertToMCreationMetadata(tbl.getCreationMetadata()), tableType, + tbl.getBucketingVersion(), tbl.isExpertMode()); } private List convertToMFieldSchemas(List keys) { diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/client/builder/TableBuilder.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/client/builder/TableBuilder.java index 69acf3cfff..aa147dd9f1 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/client/builder/TableBuilder.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/client/builder/TableBuilder.java @@ -39,9 +39,9 @@ public class TableBuilder extends StorageDescriptorBuilder { private String dbName, tableName, owner, viewOriginalText, viewExpandedText, type; private List partCols; - private int createTime, lastAccessTime, retention; + private int createTime, lastAccessTime, retention, bucketingVersion; private Map tableParams; - private boolean rewriteEnabled, temporary; + private boolean rewriteEnabled, temporary, expertMode; public TableBuilder() { // Set some reasonable defaults @@ -145,6 +145,16 @@ public TableBuilder fromIndex(Index index) { return this; } + public TableBuilder setBucketingVersion(int bucketingVersion) { + this.bucketingVersion = bucketingVersion; + return this; + } + + public TableBuilder setExpertMode(boolean expertMode) { + this.expertMode = expertMode; + return this; + } + public Table build() throws MetaException { if (dbName == null || tableName == null) { throw new MetaException("You must set the database and table name"); @@ -157,7 +167,8 @@ public Table build() throws MetaException { } } Table t = new Table(tableName, dbName, owner, createTime, lastAccessTime, retention, buildSd(), - partCols, tableParams, viewOriginalText, viewExpandedText, type); + partCols, tableParams, viewOriginalText, viewExpandedText, type, bucketingVersion, + expertMode); if (rewriteEnabled) t.setRewriteEnabled(true); if (temporary) t.setTemporary(temporary); return t; diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/model/MTable.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/model/MTable.java index 6c40ae8753..9352775634 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/model/MTable.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/model/MTable.java @@ -37,6 +37,8 @@ private boolean rewriteEnabled; private Map creationMetadata; private String tableType; + private int bucketingVersion; + private boolean expertMode; public MTable() {} @@ -58,7 +60,7 @@ public MTable(String tableName, MDatabase database, MStorageDescriptor sd, Strin int createTime, int lastAccessTime, int retention, List partitionKeys, Map parameters, String viewOriginalText, String viewExpandedText, boolean rewriteEnabled, Map creationMetadata, - String tableType) { + String tableType, int bucketingVersion, boolean expertMode) { this.tableName = tableName; this.database = database; this.sd = sd; @@ -73,6 +75,8 @@ public MTable(String tableName, MDatabase database, MStorageDescriptor sd, Strin this.rewriteEnabled = rewriteEnabled; this.creationMetadata = creationMetadata; this.tableType = tableType; + this.bucketingVersion = bucketingVersion; + this.expertMode = expertMode; } /** @@ -270,4 +274,18 @@ public void setTableType(String tableType) { public String getTableType() { return tableType; } + + /** + * @return the bucketingVersion + */ + public int getBucketingVersion() { + return bucketingVersion; + } + + /** + * @return the expertMode + */ + public boolean isExpertMode() { + return expertMode; + } } diff --git a/standalone-metastore/src/main/thrift/hive_metastore.thrift b/standalone-metastore/src/main/thrift/hive_metastore.thrift index 93f3e53de2..b2a1ccf161 100644 --- a/standalone-metastore/src/main/thrift/hive_metastore.thrift +++ b/standalone-metastore/src/main/thrift/hive_metastore.thrift @@ -324,10 +324,12 @@ struct Table { 10: string viewOriginalText, // original view text, null for non-view 11: string viewExpandedText, // expanded view text, null for non-view 12: string tableType, // table type enum, e.g. EXTERNAL_TABLE - 13: optional PrincipalPrivilegeSet privileges, - 14: optional bool temporary=false, - 15: optional bool rewriteEnabled, // rewrite enabled or not - 16: optional map creationMetadata // only for MVs, it stores table name used -> last modification before MV creation + 13: i32 bucketingVersion, // For bucketed table only. Default : 2, for existing tables, 1, for new tables 2. + 14: bool expertMode, // For bucketed table only. Default : false. true if user loads data using “load data” command. + 15: optional PrincipalPrivilegeSet privileges, + 16: optional bool temporary=false, + 17: optional bool rewriteEnabled, // rewrite enabled or not + 18: optional map creationMetadata // only for MVs, it stores table name used -> last modification before MV creation } struct Partition { diff --git a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStorePartitionSpecs.java b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStorePartitionSpecs.java index 57e5a4126e..f459509359 100644 --- a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStorePartitionSpecs.java +++ b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStorePartitionSpecs.java @@ -129,7 +129,8 @@ private static void createTable(HiveMetaStoreClient hmsc, boolean enablePartitio Map tableParameters = new HashMap<>(); tableParameters.put("hive.hcatalog.partition.spec.grouping.enabled", enablePartitionGrouping? "true":"false"); - Table table = new Table(tableName, dbName, "", 0, 0, 0, storageDescriptor, partColumns, tableParameters, "", "", ""); + Table table = new Table(tableName, dbName, "", 0, 0, 0, storageDescriptor, + partColumns, tableParameters, "", "", "", 2, false); hmsc.createTable(table); Assert.assertTrue("Table " + dbName + "." + tableName + " does not exist", diff --git a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java index 372dee6369..bc9c8755e3 100644 --- a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java +++ b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java @@ -176,7 +176,7 @@ public void testTableOps() throws MetaException, InvalidObjectException, NoSuchO HashMap params = new HashMap<>(); params.put("EXTERNAL", "false"); Table tbl1 = - new Table(TABLE1, DB1, "owner", 1, 2, 3, sd1, null, params, null, null, "MANAGED_TABLE"); + new Table(TABLE1, DB1, "owner", 1, 2, 3, sd1, null, params, null, null, "MANAGED_TABLE", 2, false); objectStore.createTable(tbl1); List tables = objectStore.getAllTables(DB1); @@ -188,7 +188,7 @@ public void testTableOps() throws MetaException, InvalidObjectException, NoSuchO "location", null, null, false, 0, new SerDeInfo("SerDeName", "serializationLib", null), null, null, null); Table newTbl1 = new Table("new" + TABLE1, DB1, "owner", 1, 2, 3, sd2, null, params, null, null, - "MANAGED_TABLE"); + "MANAGED_TABLE", 2, false); objectStore.alterTable(DB1, TABLE1, newTbl1); tables = objectStore.getTables(DB1, "new*"); Assert.assertEquals(1, tables.size()); @@ -259,7 +259,7 @@ public void testPartitionOps() throws MetaException, InvalidObjectException, FieldSchema partitionKey2 = new FieldSchema("State", ColumnType.STRING_TYPE_NAME, ""); Table tbl1 = new Table(TABLE1, DB1, "owner", 1, 2, 3, sd, Arrays.asList(partitionKey1, partitionKey2), - tableParams, null, null, "MANAGED_TABLE"); + tableParams, null, null, "MANAGED_TABLE", 2, false); objectStore.createTable(tbl1); HashMap partitionParams = new HashMap<>(); partitionParams.put("PARTITION_LEVEL_PRIVILEGE", "true"); diff --git a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestOldSchema.java b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestOldSchema.java index 6a44833a67..0761be961f 100644 --- a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestOldSchema.java +++ b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestOldSchema.java @@ -129,7 +129,7 @@ public void testPartitionOps() throws Exception { List partCols = new ArrayList<>(); partCols.add(new FieldSchema("ds", "string", "")); Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols, - Collections.emptyMap(), null, null, null); + Collections.emptyMap(), null, null, null, 2, false); store.createTable(table); Deadline.startTimer("getPartition"); diff --git a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java index b9a8f61c69..af262c96f7 100644 --- a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java +++ b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java @@ -207,7 +207,7 @@ public void testTableOps() throws Exception { sd.setStoredAsSubDirectories(false); Table tbl = new Table(tblName, dbName, tblOwner, 0, 0, 0, sd, new ArrayList<>(), tblParams, - null, null, TableType.MANAGED_TABLE.toString()); + null, null, TableType.MANAGED_TABLE.toString(), 2 , false); objectStore.createTable(tbl); tbl = objectStore.getTable(dbName, tblName); @@ -224,7 +224,7 @@ public void testTableOps() throws Exception { String tblName1 = "tbl1"; Table tbl1 = new Table(tblName1, dbName, tblOwner, 0, 0, 0, sd, new ArrayList<>(), tblParams, - null, null, TableType.MANAGED_TABLE.toString()); + null, null, TableType.MANAGED_TABLE.toString(), 2, false); tbl1.setCreationMetadata(new HashMap()); cachedStore.createTable(tbl1); tbl1 = cachedStore.getTable(dbName, tblName1); @@ -237,7 +237,7 @@ public void testTableOps() throws Exception { String tblName2 = "tbl2"; Table tbl2 = new Table(tblName2, dbName, tblOwner, 0, 0, 0, sd, new ArrayList<>(), tblParams, - null, null, TableType.MANAGED_TABLE.toString()); + null, null, TableType.MANAGED_TABLE.toString(), 2, false); objectStore.createTable(tbl2); tbl2 = objectStore.getTable(dbName, tblName2); @@ -245,7 +245,7 @@ public void testTableOps() throws Exception { tblOwner = "user2"; tbl = new Table(tblName, dbName, tblOwner, 0, 0, 0, sd, new ArrayList<>(), tblParams, - null, null, TableType.MANAGED_TABLE.toString()); + null, null, TableType.MANAGED_TABLE.toString(), 2, false); objectStore.alterTable(dbName, tblName, tbl); tbl = objectStore.getTable(dbName, tblName); @@ -314,7 +314,7 @@ public void testPartitionOps() throws Exception { ptnCols.add(ptnCol1); Table tbl = new Table(tblName, dbName, tblOwner, 0, 0, 0, sd, ptnCols, tblParams, null, null, - TableType.MANAGED_TABLE.toString()); + TableType.MANAGED_TABLE.toString(), 2, false); objectStore.createTable(tbl); tbl = objectStore.getTable(dbName, tblName); final String ptnColVal1 = "aaa"; @@ -428,7 +428,7 @@ public void testTableColStatsOps() throws Exception { null, serdeParams); Table tbl = new Table(tblName, dbName, tblOwner, 0, 0, 0, sd, new ArrayList<>(), tblParams, - null, null, TableType.MANAGED_TABLE.toString()); + null, null, TableType.MANAGED_TABLE.toString(), 2, false); objectStore.createTable(tbl); tbl = objectStore.getTable(dbName, tblName); @@ -701,7 +701,7 @@ public void testAggrStatsRepeatedRead() throws Exception { Table tbl = new Table(tblName, dbName, null, 0, 0, 0, sd, partCols, new HashMap<>(), - null, null, TableType.MANAGED_TABLE.toString()); + null, null, TableType.MANAGED_TABLE.toString(), 2, false); cachedStore.createTable(tbl); List partVals1 = new ArrayList<>(); @@ -767,7 +767,7 @@ public void testPartitionAggrStats() throws Exception { Table tbl = new Table(tblName, dbName, null, 0, 0, 0, sd, partCols, new HashMap<>(), - null, null, TableType.MANAGED_TABLE.toString()); + null, null, TableType.MANAGED_TABLE.toString(), 2, false); cachedStore.createTable(tbl); List partVals1 = new ArrayList<>(); @@ -837,7 +837,7 @@ public void testPartitionAggrStatsBitVector() throws Exception { Table tbl = new Table(tblName, dbName, null, 0, 0, 0, sd, partCols, new HashMap<>(), - null, null, TableType.MANAGED_TABLE.toString()); + null, null, TableType.MANAGED_TABLE.toString(), 2, false); cachedStore.createTable(tbl); List partVals1 = new ArrayList<>();