diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 65ec1b9..df295b1 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -731,6 +731,10 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { HIVEHYBRIDGRACEHASHJOINMEMCHECKFREQ("hive.mapjoin.hybridgrace.memcheckfrequency", 1024, "For " + "hybrid grace hash join, how often (how many rows apart) we check if memory is full. " + "This number should be power of 2."), + HIVEHYBRIDGRACEHASHJOINMINWBSIZE("hive.mapjoin.hybridgrace.minwbsize", 512, "For hybrid grace" + + " hash join, the minimum write buffer size used by optimized hashtable."), + HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS("hive.mapjoin.hybridgrace.minnumpartitions", 16, "For" + + " hybrid grace hash join, the minimum number of partitions to create."), HIVEHASHTABLEWBSIZE("hive.mapjoin.optimized.hashtable.wbsize", 10 * 1024 * 1024, "Optimized hashtable (see hive.mapjoin.optimized.hashtable) uses a chain of buffers to\n" + "store data. This is one buffer size. HT may be slightly faster if this is larger, but for small\n" + diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 0a5d839..b74f5c0 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -299,7 +299,8 @@ minitez.query.files=bucket_map_join_tez1.q,\ dynamic_partition_pruning_2.q,\ explainuser_1.q,\ explainuser_2.q,\ - hybridhashjoin.q,\ + hybridgrace_hashjoin_1.q,\ + hybridgrace_hashjoin_2.q,\ mapjoin_decimal.q,\ lvj_mapjoin.q, \ mrr.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java index 38d72f5..2037ea6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java @@ -89,9 +89,9 @@ private UnwrapRowContainer[] unwrapContainer; private transient Configuration hconf; private transient boolean hybridMapJoinLeftover; // whether there's spilled data to be processed - private transient MapJoinBytesTableContainer currentSmallTable; // reloaded hashmap from disk - private transient int tag; // big table alias - private transient int smallTable; // small table alias + private transient MapJoinBytesTableContainer[] spilledMapJoinTables; // + protected HybridHashTableContainer biggestSmallTable; // The biggest small table + // Only this container has spilled big table rows public MapJoinOperator() { } @@ -268,7 +268,6 @@ public void generateMapMetaData() throws HiveException { private Pair loadHashTable( ExecMapperContext mapContext, MapredContext mrContext) throws HiveException { - loadCalled = true; if (this.hashTblInitedOnce @@ -321,18 +320,6 @@ protected MapJoinKey getRefKey(byte alias) { @Override public void process(Object row, int tag) throws HiveException { - this.tag = tag; - - // As we're calling processOp again to process the leftover triplets, we know the "row" is - // coming from the on-disk matchfile. We need to recreate hashMapRowGetter against new hashtable - if (hybridMapJoinLeftover) { - assert hashMapRowGetters != null; - if (hashMapRowGetters[smallTable] == null) { - MapJoinKey refKey = getRefKey((byte) tag); - hashMapRowGetters[smallTable] = currentSmallTable.createGetter(refKey); - } - } - try { alias = (byte) tag; if (hashMapRowGetters == null) { @@ -345,13 +332,24 @@ public void process(Object row, int tag) throws HiveException { } } + // As we're calling processOp again to process the leftover triplets, we know the "row" is + // coming from the spilled matchfile. We need to recreate hashMapRowGetter against new hashtables + if (hybridMapJoinLeftover) { + MapJoinKey refKey = getRefKey(alias); + for (byte pos = 0; pos < order.length; pos++) { + if (pos != alias && spilledMapJoinTables[pos] != null) { + hashMapRowGetters[pos] = spilledMapJoinTables[pos].createGetter(refKey); + } + } + } + // compute keys and values as StandardObjects ReusableGetAdaptor firstSetKey = null; int fieldCount = joinKeys[alias].size(); boolean joinNeeded = false; + boolean bigTableRowSpilled = false; for (byte pos = 0; pos < order.length; pos++) { if (pos != alias) { - smallTable = pos; // record small table alias JoinUtil.JoinResult joinResult; ReusableGetAdaptor adaptor; if (firstSetKey == null) { @@ -386,8 +384,10 @@ public void process(Object row, int tag) throws HiveException { aliasFilterTags[pos] = rowContainer.getAliasFilter(); } // Spill the big table rows into appropriate partition - if (joinResult == JoinUtil.JoinResult.SPILL) { + if (joinResult == JoinUtil.JoinResult.SPILL && + !bigTableRowSpilled) { // For n-way join, only spill big table rows once spillBigTableRow(mapJoinTables[pos], row); + bigTableRowSpilled = true; } } } @@ -427,7 +427,6 @@ protected void spillBigTableRow(MapJoinTableContainer hybridHtContainer, Object @Override public void closeOp(boolean abort) throws HiveException { - boolean spilled = false; for (MapJoinTableContainer container: mapJoinTables) { if (container != null) { @@ -436,42 +435,64 @@ public void closeOp(boolean abort) throws HiveException { } } + // For Hybrid Grace Hash Join, we need to see if there is any spilled data to be processed next if (spilled) { - for (MapJoinTableContainer tableContainer : mapJoinTables) { - if (tableContainer != null) { - if (tableContainer instanceof HybridHashTableContainer) { - HybridHashTableContainer hybridHtContainer = (HybridHashTableContainer) tableContainer; - hybridHtContainer.dumpStats(); - - HashPartition[] hashPartitions = hybridHtContainer.getHashPartitions(); - // Clear all in memory partitions first - for (int i = 0; i < hashPartitions.length; i++) { - if (!hashPartitions[i].isHashMapOnDisk()) { - hybridHtContainer.setTotalInMemRowCount( - hybridHtContainer.getTotalInMemRowCount() - - hashPartitions[i].getHashMapFromMemory().getNumValues()); - hashPartitions[i].getHashMapFromMemory().clear(); - } - } - assert hybridHtContainer.getTotalInMemRowCount() == 0; - - for (int i = 0; i < hashPartitions.length; i++) { - if (hashPartitions[i].isHashMapOnDisk()) { - // Recursively process on-disk triplets (hash partition, sidefile, matchfile) - try { - hybridMapJoinLeftover = true; - hashMapRowGetters[smallTable] = null; - continueProcess(hashPartitions[i], hybridHtContainer); - } catch (IOException e) { - e.printStackTrace(); - } catch (ClassNotFoundException e) { - e.printStackTrace(); - } catch (SerDeException e) { - e.printStackTrace(); + int numPartitions = 0; + // Find out number of partitions for each small table (should be same across tables) + for (byte pos = 0; pos < mapJoinTables.length; pos++) { + if (pos != alias) { + biggestSmallTable = (HybridHashTableContainer)mapJoinTables[pos]; + numPartitions = biggestSmallTable.getHashPartitions().length; + break; + } + } + assert numPartitions != 0 : "Number of partitions must be greater than 0!"; + + if (biggestSmallTable.hasSpill()) { + spilledMapJoinTables = new MapJoinBytesTableContainer[mapJoinTables.length]; + hybridMapJoinLeftover = true; + + // Clear all in-memory partitions first + for (byte pos = 0; pos < mapJoinTables.length; pos++) { + MapJoinTableContainer tableContainer = mapJoinTables[pos]; + if (tableContainer != null) { + tableContainer.dumpMetrics(); + + if (tableContainer instanceof HybridHashTableContainer) { + HybridHashTableContainer hybridHtContainer = (HybridHashTableContainer) tableContainer; + hybridHtContainer.dumpStats(); + + HashPartition[] hashPartitions = hybridHtContainer.getHashPartitions(); + // Clear all in memory partitions first + for (int i = 0; i < hashPartitions.length; i++) { + if (!hashPartitions[i].isHashMapOnDisk()) { + hybridHtContainer.setTotalInMemRowCount( + hybridHtContainer.getTotalInMemRowCount() - + hashPartitions[i].getHashMapFromMemory().getNumValues()); + hashPartitions[i].getHashMapFromMemory().clear(); } } - hybridMapJoinLeftover = false; - currentSmallTable = null; + assert hybridHtContainer.getTotalInMemRowCount() == 0; + } + } + } + + // Reprocess the spilled data + for (int i = 0; i < numPartitions; i++) { + HashPartition[] hashPartitions = biggestSmallTable.getHashPartitions(); + if (hashPartitions[i].isHashMapOnDisk()) { + try { + continueProcess(i); // Re-process spilled data + } catch (IOException e) { + e.printStackTrace(); + } catch (SerDeException e) { + e.printStackTrace(); + } catch (ClassNotFoundException e) { + e.printStackTrace(); + } + for (byte pos = 0; pos < order.length; pos++) { + if (pos != alias) + spilledMapJoinTables[pos] = null; } } } @@ -492,18 +513,21 @@ public void closeOp(boolean abort) throws HiveException { } /** - * Continue processing each pair of spilled hashtable and big table row container - * @param partition hash partition to process - * @param hybridHtContainer Hybrid hashtable container + * Continue processing join between spilled hashtable(s) and spilled big table + * @param partitionId the partition number across all small tables to process * @throws HiveException * @throws IOException - * @throws ClassNotFoundException * @throws SerDeException */ - private void continueProcess(HashPartition partition, HybridHashTableContainer hybridHtContainer) - throws HiveException, IOException, ClassNotFoundException, SerDeException { - reloadHashTable(partition, hybridHtContainer); - reProcessBigTable(partition); + private void continueProcess(int partitionId) + throws HiveException, IOException, SerDeException, ClassNotFoundException { + for (byte pos = 0; pos < mapJoinTables.length; pos++) { + if (pos != alias) { + reloadHashTable(pos, partitionId); + } + } + + reProcessBigTable(partitionId); } /** @@ -511,16 +535,16 @@ private void continueProcess(HashPartition partition, HybridHashTableContainer h * It can have two steps: * 1) Deserialize a serialized hash table, and * 2) Merge every key/value pair from small table container into the hash table - * @param partition hash partition to process - * @param hybridHtContainer Hybrid hashtable container + * @param pos position of small table + * @param partitionId the partition of the small table to be reloaded from * @throws IOException - * @throws ClassNotFoundException * @throws HiveException * @throws SerDeException */ - private void reloadHashTable(HashPartition partition, - HybridHashTableContainer hybridHtContainer) - throws IOException, ClassNotFoundException, HiveException, SerDeException { + private void reloadHashTable(byte pos, int partitionId) + throws IOException, HiveException, SerDeException, ClassNotFoundException { + HybridHashTableContainer container = (HybridHashTableContainer)mapJoinTables[pos]; + HashPartition partition = container.getHashPartitions()[partitionId]; // Merge the sidefile into the newly created hash table // This is where the spilling may happen again @@ -539,11 +563,12 @@ private void reloadHashTable(HashPartition partition, // If based on the new key count, keyCount is smaller than a threshold, // then just load the entire restored hashmap into memory. // The size of deserialized partition shouldn't exceed half of memory limit - if (rowCount * hybridHtContainer.getTableRowSize() >= hybridHtContainer.getMemoryThreshold() / 2) { - LOG.info("Hybrid Grace Hash Join: Hash table reload can fail since it will be greater than memory limit. Recursive spilling is currently not supported"); + if (rowCount * container.getTableRowSize() >= container.getMemoryThreshold() / 2) { + throw new RuntimeException("Hybrid Grace Hash Join: Hash table cannot be reloaded since it" + + " will be greater than memory limit. Recursive spilling is currently not supported"); } - KeyValueHelper writeHelper = hybridHtContainer.getWriteHelper(); + KeyValueHelper writeHelper = container.getWriteHelper(); while (kvContainer.hasNext()) { ObjectPair pair = kvContainer.next(); Writable key = pair.getFirst(); @@ -552,26 +577,26 @@ private void reloadHashTable(HashPartition partition, restoredHashMap.put(writeHelper, -1); } - hybridHtContainer.setTotalInMemRowCount(hybridHtContainer.getTotalInMemRowCount() + container.setTotalInMemRowCount(container.getTotalInMemRowCount() + restoredHashMap.getNumValues() + kvContainer.size()); kvContainer.clear(); - // Since there's only one hashmap to deal with, it's OK to create a MapJoinBytesTableContainer - currentSmallTable = new MapJoinBytesTableContainer(restoredHashMap); - currentSmallTable.setInternalValueOi(hybridHtContainer.getInternalValueOi()); - currentSmallTable.setSortableSortOrders(hybridHtContainer.getSortableSortOrders()); + spilledMapJoinTables[pos] = new MapJoinBytesTableContainer(restoredHashMap); + spilledMapJoinTables[pos].setInternalValueOi(container.getInternalValueOi()); + spilledMapJoinTables[pos].setSortableSortOrders(container.getSortableSortOrders()); } /** * Iterate over the big table row container and feed process() with leftover rows - * @param partition the hash partition being brought back to memory at the moment + * @param partitionId the partition from which to take out spilled big table rows * @throws HiveException */ - protected void reProcessBigTable(HashPartition partition) throws HiveException { + protected void reProcessBigTable(int partitionId) throws HiveException { + HashPartition partition = biggestSmallTable.getHashPartitions()[partitionId]; ObjectContainer bigTable = partition.getMatchfileObjContainer(); while (bigTable.hasNext()) { Object row = bigTable.next(); - process(row, tag); + process(row, alias); } bigTable.clear(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java index dd5c621..2ba622e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java @@ -566,6 +566,7 @@ public void clear() { this.writeBuffers.clear(); this.refs = new long[1]; this.keysAssigned = 0; + this.numValues = 0; } public void expandAndRehashToTarget(int estimateNewRowCount) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableConf.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableConf.java new file mode 100644 index 0000000..625038f --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableConf.java @@ -0,0 +1,86 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.persistence; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * This conf class is a wrapper of a list of HybridHashTableContainers and some common info shared + * among them, which is used in n-way join (multiple small tables are involved). + */ +public class HybridHashTableConf { + private List loadedContainerList; // A list of alrady loaded containers + private int numberOfPartitions = 0; // Number of partitions each table should have + private int nextSpillPartition = -1; // The partition to be spilled next + + public HybridHashTableConf() { + loadedContainerList = new ArrayList(); + } + + public int getNumberOfPartitions() { + return numberOfPartitions; + } + + public void setNumberOfPartitions(int numberOfPartitions) { + this.numberOfPartitions = numberOfPartitions; + this.nextSpillPartition = numberOfPartitions - 1; + } + + public int getNextSpillPartition() { + return this.nextSpillPartition; + } + + public void setNextSpillPartition(int nextSpillPartition) { + this.nextSpillPartition = nextSpillPartition; + } + + + public List getLoadedContainerList() { + return loadedContainerList; + } + + /** + * Spill one in-memory partition from tail for all previously loaded HybridHashTableContainers. + * Also mark that partition number as spill-on-creation for future created containers. + * @return amount of memory freed; 0 if only one last partition is in memory for each container + */ + public long spill() throws IOException { + if (nextSpillPartition == 0) { + return 0; + } + long memFreed = 0; + for (HybridHashTableContainer container : loadedContainerList) { + memFreed += container.spillPartition(nextSpillPartition); + container.setSpill(true); + } + nextSpillPartition--; + return memFreed; + } + + /** + * Check if a partition should be spilled directly on creation + * @param partitionId the partition to create + * @return true if it should be spilled directly, false otherwise + */ + public boolean doSpillOnCreation(int partitionId) { + return nextSpillPartition != -1 && partitionId > nextSpillPartition; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java index 67477c2..d5c5f55 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java @@ -69,15 +69,20 @@ private static final Log LOG = LogFactory.getLog(HybridHashTableContainer.class); private final HashPartition[] hashPartitions; // an array of partitions holding the triplets - private int totalInMemRowCount = 0; // total number of small table rows in memory - private final long memoryThreshold; // the max memory limit allocated + private int totalInMemRowCount = 0; // total number of small table rows in memory + private long memoryThreshold; // the max memory limit that can be allocated + private long memoryUsed; // the actual memory used + private int writeBufferSize; // write buffer size for this HybridHashTableContainer private final long tableRowSize; // row size of the small table - private boolean isSpilled; // whether there's any spilled partition - private int toSpillPartitionId; // the partition into which to spill the big table row; - // This may change after every setMapJoinKey call - private int numPartitionsSpilled; // number of spilled partitions - private boolean lastPartitionInMem; // only one (last one) partition is left in memory + private boolean isSpilled; // whether there's any spilled partition + private int toSpillPartitionId; // the partition into which to spill the big table row; + // This may change after every setMapJoinKey call + private int numPartitionsSpilled; // number of spilled partitions + private boolean lastPartitionInMem; // only one (last one) partition is left in memory private final int memoryCheckFrequency; // how often (# of rows apart) to check if memory is full + private final int minWbSize; // the minimum write buffer size used in each partition + private final int minNumParts; // the minimum number of partitions to be created + private HybridHashTableConf nwayConf; // configuration for n-way join /** The OI used to deserialize values. We never deserialize keys. */ private LazyBinaryStructObjectInspector internalValueOi; @@ -168,51 +173,64 @@ public boolean isHashMapOnDisk() { } } - public HybridHashTableContainer(Configuration hconf, long keyCount, long memUsage, long tableSize) - throws SerDeException { + public HybridHashTableContainer(Configuration hconf, long keyCount, long memoryAvailable, + long estimatedTableSize, HybridHashTableConf nwayConf) + throws SerDeException, IOException { this(HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEKEYCOUNTADJUSTMENT), HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLETHRESHOLD), HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLELOADFACTOR), - HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEWBSIZE), - HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD), HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMEMCHECKFREQ), - tableSize, keyCount, memUsage); + HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINWBSIZE), + HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS), + estimatedTableSize, keyCount, memoryAvailable, nwayConf); } - private HybridHashTableContainer(float keyCountAdj, int threshold, float loadFactor, int wbSize, - long noConditionalTaskThreshold, int memCheckFreq, long tableSize, - long keyCount, long memUsage) throws SerDeException { - - if (wbSize > noConditionalTaskThreshold) { - LOG.warn("adjusting hash table write buffer size to be smaller than noconditionaltasksize"); - wbSize = (int) noConditionalTaskThreshold; - } - + private HybridHashTableContainer(float keyCountAdj, int threshold, float loadFactor, + int memCheckFreq, int minWbSize, int minNumParts, + long estimatedTableSize, long keyCount, + long memoryAvailable, HybridHashTableConf nwayConf) + throws SerDeException, IOException { int newKeyCount = HashMapWrapper.calculateTableSize( keyCountAdj, threshold, loadFactor, keyCount); - - memoryThreshold = noConditionalTaskThreshold; - tableRowSize = tableSize / newKeyCount; + memoryThreshold = memoryAvailable; + tableRowSize = estimatedTableSize / keyCount; memoryCheckFrequency = memCheckFreq; - - int numPartitions = calcNumPartitions(tableSize, wbSize); // estimate # of partitions to create + this.minWbSize = minWbSize; + this.minNumParts = minNumParts; + this.nwayConf = nwayConf; + int numPartitions = calcNumPartitions(estimatedTableSize); // spilling previously loaded table may happen hashPartitions = new HashPartition[numPartitions]; int numPartitionsSpilledOnCreation = 0; - long memoryAllocated = 0; + memoryUsed = 0; int initialCapacity = Math.max(newKeyCount / numPartitions, threshold / numPartitions); + for (int i = 0; i < numPartitions; i++) { - if (i == 0) { // We unconditionally create a hashmap for the first hash partition - hashPartitions[i] = new HashPartition(initialCapacity, loadFactor, wbSize, memUsage, true); - } else { - hashPartitions[i] = new HashPartition(initialCapacity, loadFactor, wbSize, memUsage, - memoryAllocated + wbSize < memoryThreshold); + if (this.nwayConf == null || // binary join + nwayConf.getLoadedContainerList().size() == 0) { // n-way join, first (biggest) small table + if (i == 0) { // We unconditionally create a hashmap for the first hash partition + hashPartitions[i] = new HashPartition(initialCapacity, loadFactor, writeBufferSize, memoryThreshold, true); + } else { + hashPartitions[i] = new HashPartition(initialCapacity, loadFactor, writeBufferSize, memoryThreshold, + memoryUsed + writeBufferSize < memoryThreshold); + } + } else { // n-way join + // For all later small tables, follow the same pattern of the previously loaded tables. + if (this.nwayConf.doSpillOnCreation(i)) { + hashPartitions[i] = new HashPartition(threshold, loadFactor, writeBufferSize, memoryThreshold, false); + } else { + hashPartitions[i] = new HashPartition(threshold, loadFactor, writeBufferSize, memoryThreshold, true); + } } + if (isHashMapSpilledOnCreation(i)) { numPartitionsSpilledOnCreation++; numPartitionsSpilled++; this.setSpill(true); + if (this.nwayConf != null && this.nwayConf.getNextSpillPartition() == numPartitions - 1) { + this.nwayConf.setNextSpillPartition(i - 1); + } } else { - memoryAllocated += hashPartitions[i].hashMap.memorySize(); + memoryUsed += hashPartitions[i].hashMap.memorySize(); } } assert numPartitionsSpilledOnCreation != numPartitions : "All partitions are directly spilled!" + @@ -220,6 +238,11 @@ private HybridHashTableContainer(float keyCountAdj, int threshold, float loadFac LOG.info("Number of partitions created: " + numPartitions); LOG.info("Number of partitions spilled directly to disk on creation: " + numPartitionsSpilledOnCreation); + + // Append this container to the loaded list + if (this.nwayConf != null) { + this.nwayConf.getLoadedContainerList().add(this); + } } @@ -235,6 +258,20 @@ public long getMemoryThreshold() { return memoryThreshold; } + /** + * Get the current memory usage by recalculating it. + * @return current memory usage + */ + public long refreshMemoryUsed() { + long memUsed = 0; + for (HashPartition hp : hashPartitions) { + if (hp.hashMap != null) { + memUsed += hp.hashMap.memorySize(); + } + } + return memoryUsed = memUsed; + } + public LazyBinaryStructObjectInspector getInternalValueOi() { return internalValueOi; } @@ -292,10 +329,16 @@ public MapJoinKey putRow(MapJoinObjectSerDeContext keyContext, Writable currentK LOG.warn("This LAST partition in memory won't be spilled!"); lastPartitionInMem = true; } else { - int biggest = biggestPartition(); + if (nwayConf == null) { // binary join + int biggest = biggestPartition(); + spillPartition(biggest); + this.setSpill(true); + } else { // n-way join + LOG.info("N-way spilling: spill tail partition from previously loaded small tables"); + memoryThreshold += nwayConf.spill(); + LOG.info("Memory threshold has been increased to: " + memoryThreshold); + } numPartitionsSpilled++; - spillPartition(biggest); - this.setSpill(true); } } } @@ -328,13 +371,7 @@ public boolean isHashMapSpilledOnCreation(int partitionId) { * @return true if memory is full, false if not */ private boolean isMemoryFull() { - long size = 0; - for (int i = 0; i < hashPartitions.length; i++) { - if (!isOnDisk(i)) { - size += hashPartitions[i].hashMap.memorySize(); - } - } - return size >= memoryThreshold; + return refreshMemoryUsed() >= memoryThreshold; } /** @@ -364,11 +401,11 @@ private int biggestPartition() { /** * Move the hashtable of a specified partition from memory into local file system * @param partitionId the hashtable to be moved + * @return amount of memory freed */ - private void spillPartition(int partitionId) throws IOException { + public long spillPartition(int partitionId) throws IOException { HashPartition partition = hashPartitions[partitionId]; int inMemRowCount = partition.hashMap.getNumValues(); - long inMemSize = partition.hashMap.memorySize(); Path path = Files.createTempFile("partition-" + partitionId + "-", null); OutputStream outputStream = Files.newOutputStream(path); @@ -382,57 +419,75 @@ private void spillPartition(int partitionId) throws IOException { partition.hashMapLocalPath = path; partition.hashMapOnDisk = true; - long size = 0; - for (int i = 0; i < hashPartitions.length; i++) { - if (!isOnDisk(i)) { - size += hashPartitions[i].hashMap.memorySize(); - } - } LOG.info("Spilling hash partition " + partitionId + " (Rows: " + inMemRowCount + - ", Mem size: " + inMemSize + "): " + path); - LOG.info("Memory usage before spilling: " + size); - LOG.info("Memory usage after spilling: " + (size - inMemSize)); + ", Mem size: " + partition.hashMap.memorySize() + "): " + path); + LOG.info("Memory usage before spilling: " + memoryUsed); + + long memFreed = partition.hashMap.memorySize(); + memoryUsed -= memFreed; + LOG.info("Memory usage after spilling: " + memoryUsed); totalInMemRowCount -= inMemRowCount; partition.hashMap.clear(); + return memFreed; } /** - * Calculate how many partitions are needed. This is an estimation. + * Calculate how many partitions are needed. + * For n-way join, we only do this calculation once, for the biggest small table (we process the + * biggest small table first). The other small tables will use the same number. They may need to + * adjust (usually reduce) their individual write buffer size in order not to exceed memory threshold. * @param dataSize total data size for the table - * @param wbSize write buffer size * @return number of partitions needed */ - private int calcNumPartitions(long dataSize, int wbSize) { - if (memoryThreshold < wbSize) { - throw new IllegalStateException("Available memory is less than hashtable writebuffer size!" - + " Try increasing hive.auto.convert.join.noconditionaltask.size."); - } + private int calcNumPartitions(long dataSize) throws IOException { + int numPartitions = minNumParts; - int lowerLimit = 2; - int numPartitions = (int) Math.ceil(dataSize / wbSize); + if (nwayConf == null || // binary join + nwayConf.getNumberOfPartitions() == 0) { // n-way join, first (biggest) small table + if (memoryThreshold < minNumParts * minWbSize) { + throw new RuntimeException("Available memory is not enough to create a HybridHashTableContainer!"); + } + if (memoryThreshold < dataSize) { + while (dataSize / numPartitions > memoryThreshold) { + numPartitions *= 2; + } + } + writeBufferSize = (int)(dataSize / numPartitions); + if (nwayConf != null) { + nwayConf.setNumberOfPartitions(numPartitions); + } + } else { // Just reuse the same number as calculated before for the biggest small table + numPartitions = nwayConf.getNumberOfPartitions(); + LOG.info("Original total available memory: " + memoryThreshold); + while (memoryThreshold < numPartitions * minNumParts) { + // Spill previously loaded tables to make more room + long memFreed = nwayConf.spill(); + if (memFreed == 0) { + throw new RuntimeException("Available memory is not enough to create" + + " HybridHashTableContainers consistently!"); + } else { + LOG.info("Total available memory was: " + memoryThreshold); + memoryThreshold += memFreed; + LOG.info("Total available memory is: " + memoryThreshold); + } + } + writeBufferSize = (int) (memoryThreshold / numPartitions); + } + writeBufferSize = writeBufferSize < minWbSize ? minWbSize : writeBufferSize; LOG.info("Total available memory: " + memoryThreshold); LOG.info("Estimated small table size: " + dataSize); - LOG.info("Write buffer size: " + wbSize); - LOG.info("Initial number of partitions: " + numPartitions); - - if (numPartitions < lowerLimit) { - return lowerLimit; - } else if (dataSize > memoryThreshold) { - numPartitions = (int) (memoryThreshold / wbSize); - } - // Make sure numPartitions is power of 2, to make N & (M - 1) easy when calculating partition No. - numPartitions = (Long.bitCount(numPartitions) == 1) ? numPartitions - : Integer.highestOneBit(numPartitions) << 1; - while (dataSize / numPartitions > memoryThreshold) { - numPartitions *= 2; - } - + LOG.info("Write buffer size: " + writeBufferSize); LOG.info("Number of hash partitions to be created: " + numPartitions); return numPartitions; } + /* Get number of partitions */ + public int getNumPartitions() { + return hashPartitions.length; + } + /* Get total number of rows from all in memory partitions */ public int getTotalInMemRowCount() { return totalInMemRowCount; @@ -465,6 +520,14 @@ public int getToSpillPartitionId() { return toSpillPartitionId; } + /** + * Gets the n-way join configuration + * @return n-way join conf + */ + public HybridHashTableConf getNwayConf() { + return nwayConf; + } + /* Clean up in memory hashtables */ @Override public void clear() { @@ -473,6 +536,7 @@ public void clear() { hp.hashMap.clear(); } } + memoryUsed = 0; } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java index 9034253..397f8e5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.ql.exec.MapredContext; import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; import org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper; +import org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableConf; import org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; @@ -79,10 +80,44 @@ public void load(MapJoinTableContainer[] mapJoinTables, hconf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE); boolean useHybridGraceHashJoin = desc.isHybridHashJoin(); boolean isFirstKey = true; - - // Disable hybrid grace hash join for n-way join - if (mapJoinTables.length > 2) { - useHybridGraceHashJoin = false; + // TODO remove this after memory manager is in + long noConditionalTaskThreshold = HiveConf.getLongVar( + hconf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD); + + // Only applicable to n-way Hybrid Grace Hash Join + HybridHashTableConf nwayConf = null; + long totalSize = 0; + if (useHybridGraceHashJoin && mapJoinTables.length > 2) { + // Create a Conf for n-way HybridHashTableContainers + nwayConf = new HybridHashTableConf(); + + // Find the biggest small table; also calculate total data size of all small tables + int biggest = 0; // position of the biggest small table + long maxSize = 0; // the size of the biggest small table + for (int pos = 0; pos < mapJoinTables.length; pos++) { + if (pos == desc.getPosBigTable()) { + continue; + } + totalSize += desc.getParentDataSizes().get(pos); + biggest = desc.getParentDataSizes().get(pos) > maxSize ? pos : biggest; + maxSize = desc.getParentDataSizes().get(pos) > maxSize ? desc.getParentDataSizes().get(pos) + : maxSize; + } + // We want to first process the biggest small table base on which we calculate + // number of partitions, so we swap it with the first small table. + // Swap serde and data size too. + if (biggest != 0) { + int firstSmall = desc.getPosBigTable() == 0 ? 1 : 0; + MapJoinTableContainer tempContainer = mapJoinTables[biggest]; + MapJoinTableContainerSerDe tempContainerSerDe = mapJoinTableSerdes[biggest]; + long tempDataSize = desc.getParentDataSizes().get(biggest); + mapJoinTables[biggest] = mapJoinTables[firstSmall]; + mapJoinTableSerdes[biggest] = mapJoinTableSerdes[firstSmall]; + desc.getParentDataSizes().put(biggest, desc.getParentDataSizes().get(firstSmall)); + mapJoinTables[firstSmall] = tempContainer; + mapJoinTableSerdes[firstSmall] = tempContainerSerDe; + desc.getParentDataSizes().put(firstSmall, tempDataSize); + } } for (int pos = 0; pos < mapJoinTables.length; pos++) { @@ -120,9 +155,20 @@ public void load(MapJoinTableContainer[] mapJoinTables, Long keyCountObj = parentKeyCounts.get(pos); long keyCount = (keyCountObj == null) ? -1 : keyCountObj.longValue(); + long memory = 0; + if (useHybridGraceHashJoin) { + if (mapJoinTables.length > 2) { + // Allocate n-way join memory proportionally + float percentage = (float) desc.getParentDataSizes().get(pos) / totalSize; + memory = (long) (noConditionalTaskThreshold * percentage); + } else { // binary join + memory = noConditionalTaskThreshold; + } + } + MapJoinTableContainer tableContainer = useOptimizedTables - ? (useHybridGraceHashJoin ? new HybridHashTableContainer(hconf, keyCount, memUsage, - desc.getParentDataSizes().get(pos)) + ? (useHybridGraceHashJoin ? new HybridHashTableContainer(hconf, keyCount, + memory, desc.getParentDataSizes().get(pos), nwayConf) : new MapJoinBytesTableContainer(hconf, valCtx, keyCount, memUsage)) : new HashMapWrapper(hconf, keyCount); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java index bbc8d60..5a1705d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java @@ -308,8 +308,10 @@ protected void spillBigTableRow(MapJoinTableContainer hybridHtContainer, Object } @Override - protected void reProcessBigTable(HybridHashTableContainer.HashPartition partition) + protected void reProcessBigTable(int partitionId) throws HiveException { + + HybridHashTableContainer.HashPartition partition = biggestSmallTable.getHashPartitions()[partitionId]; ObjectContainer bigTable = partition.getMatchfileObjContainer(); DataOutputBuffer dataOutputBuffer = new DataOutputBuffer(); diff --git ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q index 096c890..e92504a 100644 --- ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q +++ ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q @@ -42,6 +42,8 @@ select * from dest2; set hive.auto.convert.join.noconditionaltask=true; set hive.auto.convert.join.noconditionaltask.size=200; +set hive.mapjoin.hybridgrace.minwbsize=100; +set hive.mapjoin.hybridgrace.minnumpartitions=2; -- A SMB join followed by a mutli-insert explain diff --git ql/src/test/queries/clientpositive/hybridgrace_hashjoin_1.q ql/src/test/queries/clientpositive/hybridgrace_hashjoin_1.q new file mode 100644 index 0000000..e4f0f85 --- /dev/null +++ ql/src/test/queries/clientpositive/hybridgrace_hashjoin_1.q @@ -0,0 +1,258 @@ +-- Hybrid Grace Hash Join +-- Test basic functionalities: +-- 1. Various cases when hash partitions spill +-- 2. Partitioned table spilling +-- 3. Vectorization + +SHOW DATABASES; + +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask.size=1300000; +set hive.mapjoin.optimized.hashtable.wbsize=880000; +set hive.mapjoin.hybridgrace.memcheckfrequency=1024; + +set hive.mapjoin.hybridgrace.hashtable=false; + +-- Base result for inner join +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint + where c.cint < 2000000000) t1 +; + +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint + where c.cint < 2000000000) t1 +; + +set hive.mapjoin.hybridgrace.hashtable=true; + +-- Two partitions are created. One in memory, one on disk on creation. +-- The one in memory will eventually exceed memory limit, but won't spill. +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint + where c.cint < 2000000000) t1 +; + +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint + where c.cint < 2000000000) t1 +; + +set hive.auto.convert.join.noconditionaltask.size=3000000; +set hive.mapjoin.optimized.hashtable.wbsize=100000; + +set hive.mapjoin.hybridgrace.hashtable=false; + +-- Base result for inner join +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint) t1 +; + +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint) t1 +; + +set hive.mapjoin.hybridgrace.hashtable=true; + +-- 16 partitions are created: 3 in memory, 13 on disk on creation. +-- 1 partition is spilled during first round processing, which ends up having 2 in memory, 14 on disk +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint) t1 +; + +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint) t1 +; + + + +set hive.mapjoin.hybridgrace.hashtable=false; + +-- Base result for outer join +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + left outer join alltypesorc cd + on cd.cint = c.cint) t1 +; + +select count(*) from +(select c.ctinyint + from alltypesorc c + left outer join alltypesorc cd + on cd.cint = c.cint) t1 +; + +set hive.mapjoin.hybridgrace.hashtable=true; + +-- 32 partitions are created. 3 in memory, 29 on disk on creation. +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + left outer join alltypesorc cd + on cd.cint = c.cint) t1 +; + +select count(*) from +(select c.ctinyint + from alltypesorc c + left outer join alltypesorc cd + on cd.cint = c.cint) t1 +; + + +-- Partitioned table +create table parttbl (key string, value char(20)) partitioned by (dt char(10)); +insert overwrite table parttbl partition(dt='2000-01-01') + select * from src; +insert overwrite table parttbl partition(dt='2000-01-02') + select * from src1; + +set hive.auto.convert.join.noconditionaltask.size=30000000; +set hive.mapjoin.optimized.hashtable.wbsize=10000000; + +set hive.mapjoin.hybridgrace.hashtable=false; + +-- No spill, base result +explain +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +; + +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +; + +set hive.mapjoin.hybridgrace.hashtable=true; + +-- No spill, 2 partitions created in memory +explain +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +; + +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +; + + +set hive.auto.convert.join.noconditionaltask.size=20000; +set hive.mapjoin.optimized.hashtable.wbsize=10000; + +set hive.mapjoin.hybridgrace.hashtable=false; + +-- Spill case base result +explain +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +; + +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +; + +set hive.mapjoin.hybridgrace.hashtable=true; + +-- Spill case, one partition in memory, one spilled on creation +explain +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +; + +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +; + +drop table parttbl; + + +-- Test vectorization +-- Test case borrowed from vector_decimal_mapjoin.q +CREATE TABLE decimal_mapjoin STORED AS ORC AS + SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, + CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2, + cint + FROM alltypesorc; + +SET hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask=true; +SET hive.auto.convert.join.noconditionaltask.size=50000000; +set hive.mapjoin.optimized.hashtable.wbsize=10000; +SET hive.vectorized.execution.enabled=true; +set hive.mapjoin.hybridgrace.hashtable=false; + +EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 + FROM decimal_mapjoin l + JOIN decimal_mapjoin r ON l.cint = r.cint + WHERE l.cint = 6981; +SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 + FROM decimal_mapjoin l + JOIN decimal_mapjoin r ON l.cint = r.cint + WHERE l.cint = 6981; + +set hive.mapjoin.hybridgrace.hashtable=true; + +EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 + FROM decimal_mapjoin l + JOIN decimal_mapjoin r ON l.cint = r.cint + WHERE l.cint = 6981; +SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 + FROM decimal_mapjoin l + JOIN decimal_mapjoin r ON l.cint = r.cint + WHERE l.cint = 6981; + +DROP TABLE decimal_mapjoin; diff --git ql/src/test/queries/clientpositive/hybridgrace_hashjoin_2.q ql/src/test/queries/clientpositive/hybridgrace_hashjoin_2.q new file mode 100644 index 0000000..99ae02a --- /dev/null +++ ql/src/test/queries/clientpositive/hybridgrace_hashjoin_2.q @@ -0,0 +1,101 @@ +-- Hybrid Grace Hash Join +-- Test n-way join +SHOW DATABASES; + +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.cbo.enable=false; + + +set hive.mapjoin.hybridgrace.hashtable=true; + +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN srcpart w ON (x.key = w.key) +JOIN src y ON (y.key = x.key); + + +-- Test 3-way mapjoin (1 big table, 2 small tables) +SHOW DATABASES; + +set hive.mapjoin.hybridgrace.hashtable=false; + +EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key); + +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key); + +set hive.mapjoin.hybridgrace.hashtable=true; + +EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key); + +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key); + + +-- Test 4-way mapjoin (1 big table, 3 small tables) +SHOW DATABASES; + +set hive.mapjoin.hybridgrace.hashtable=false; + +EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN srcpart w ON (x.key = w.key) +JOIN src y ON (y.key = x.key); + +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN srcpart w ON (x.key = w.key) +JOIN src y ON (y.key = x.key); + +set hive.mapjoin.hybridgrace.hashtable=true; + +EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN srcpart w ON (x.key = w.key) +JOIN src y ON (y.key = x.key); + +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN srcpart w ON (x.key = w.key) +JOIN src y ON (y.key = x.key); + + +-- Borrowed from auto_join29.q +SHOW DATABASES; + +set hive.mapjoin.hybridgrace.hashtable=false; + +explain +SELECT * FROM src src1 +JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) +JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) +SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +SELECT * FROM src src1 +JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) +JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) +SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +set hive.mapjoin.hybridgrace.hashtable=true; + +explain +SELECT * FROM src src1 +JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) +JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) +SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +SELECT * FROM src src1 +JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) +JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) +SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/hybridhashjoin.q ql/src/test/queries/clientpositive/hybridhashjoin.q deleted file mode 100644 index fbd48ea..0000000 --- ql/src/test/queries/clientpositive/hybridhashjoin.q +++ /dev/null @@ -1,250 +0,0 @@ -set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask.size=1300000; -set hive.mapjoin.optimized.hashtable.wbsize=880000; -set hive.mapjoin.hybridgrace.memcheckfrequency=1024; - -set hive.mapjoin.hybridgrace.hashtable=false; - --- Base result for inner join -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint - where c.cint < 2000000000) t1 -; - -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint - where c.cint < 2000000000) t1 -; - -set hive.mapjoin.hybridgrace.hashtable=true; - --- Two partitions are created. One in memory, one on disk on creation. --- The one in memory will eventually exceed memory limit, but won't spill. -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint - where c.cint < 2000000000) t1 -; - -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint - where c.cint < 2000000000) t1 -; - -set hive.auto.convert.join.noconditionaltask.size=3000000; -set hive.mapjoin.optimized.hashtable.wbsize=100000; - -set hive.mapjoin.hybridgrace.hashtable=false; - --- Base result for inner join -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint) t1 -; - -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint) t1 -; - -set hive.mapjoin.hybridgrace.hashtable=true; - --- 16 partitions are created: 3 in memory, 13 on disk on creation. --- 1 partition is spilled during first round processing, which ends up having 2 in memory, 14 on disk -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint) t1 -; - -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint) t1 -; - - - -set hive.mapjoin.hybridgrace.hashtable=false; - --- Base result for outer join -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - left outer join alltypesorc cd - on cd.cint = c.cint) t1 -; - -select count(*) from -(select c.ctinyint - from alltypesorc c - left outer join alltypesorc cd - on cd.cint = c.cint) t1 -; - -set hive.mapjoin.hybridgrace.hashtable=true; - --- 32 partitions are created. 3 in memory, 29 on disk on creation. -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - left outer join alltypesorc cd - on cd.cint = c.cint) t1 -; - -select count(*) from -(select c.ctinyint - from alltypesorc c - left outer join alltypesorc cd - on cd.cint = c.cint) t1 -; - - --- Partitioned table -create table parttbl (key string, value char(20)) partitioned by (dt char(10)); -insert overwrite table parttbl partition(dt='2000-01-01') - select * from src; -insert overwrite table parttbl partition(dt='2000-01-02') - select * from src1; - -set hive.auto.convert.join.noconditionaltask.size=30000000; -set hive.mapjoin.optimized.hashtable.wbsize=10000000; - -set hive.mapjoin.hybridgrace.hashtable=false; - --- No spill, base result -explain -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -; - -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -; - -set hive.mapjoin.hybridgrace.hashtable=true; - --- No spill, 2 partitions created in memory -explain -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -; - -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -; - - -set hive.auto.convert.join.noconditionaltask.size=20000; -set hive.mapjoin.optimized.hashtable.wbsize=10000; - -set hive.mapjoin.hybridgrace.hashtable=false; - --- Spill case base result -explain -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -; - -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -; - -set hive.mapjoin.hybridgrace.hashtable=true; - --- Spill case, one partition in memory, one spilled on creation -explain -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -; - -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -; - -drop table parttbl; - - --- Test vectorization --- Test case borrowed from vector_decimal_mapjoin.q -CREATE TABLE decimal_mapjoin STORED AS ORC AS - SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, - CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2, - cint - FROM alltypesorc; - -SET hive.auto.convert.join=true; -SET hive.auto.convert.join.noconditionaltask=true; -SET hive.auto.convert.join.noconditionaltask.size=50000000; -set hive.mapjoin.optimized.hashtable.wbsize=10000; -SET hive.vectorized.execution.enabled=true; -set hive.mapjoin.hybridgrace.hashtable=false; - -EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 - FROM decimal_mapjoin l - JOIN decimal_mapjoin r ON l.cint = r.cint - WHERE l.cint = 6981; -SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 - FROM decimal_mapjoin l - JOIN decimal_mapjoin r ON l.cint = r.cint - WHERE l.cint = 6981; - -set hive.mapjoin.hybridgrace.hashtable=true; - -EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 - FROM decimal_mapjoin l - JOIN decimal_mapjoin r ON l.cint = r.cint - WHERE l.cint = 6981; -SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 - FROM decimal_mapjoin l - JOIN decimal_mapjoin r ON l.cint = r.cint - WHERE l.cint = 6981; - -DROP TABLE decimal_mapjoin; diff --git ql/src/test/queries/clientpositive/tez_join_hash.q ql/src/test/queries/clientpositive/tez_join_hash.q index 3571cd5..67d89f8 100644 --- ql/src/test/queries/clientpositive/tez_join_hash.q +++ ql/src/test/queries/clientpositive/tez_join_hash.q @@ -14,6 +14,8 @@ SELECT count(*) FROM src, orc_src where src.key=orc_src.key; set hive.auto.convert.join=true; set hive.auto.convert.join.noconditionaltask=true; set hive.auto.convert.join.noconditionaltask.size=3000; +set hive.mapjoin.hybridgrace.minwbsize=350; +set hive.mapjoin.hybridgrace.minnumpartitions=8; explain select count(*) from (select x.key as key, y.value as value from diff --git ql/src/test/queries/clientpositive/tez_smb_main.q ql/src/test/queries/clientpositive/tez_smb_main.q index 404690f..452c4ad 100644 --- ql/src/test/queries/clientpositive/tez_smb_main.q +++ ql/src/test/queries/clientpositive/tez_smb_main.q @@ -41,6 +41,8 @@ select count(*) from tab a join tab_part b on a.key = b.key; set hive.auto.convert.join.noconditionaltask.size=2000; +set hive.mapjoin.hybridgrace.minwbsize=500; +set hive.mapjoin.hybridgrace.minnumpartitions=4; explain select count (*) from tab a join tab_part b on a.key = b.key; @@ -49,6 +51,8 @@ select count(*) from tab a join tab_part b on a.key = b.key; set hive.auto.convert.join.noconditionaltask.size=1000; +set hive.mapjoin.hybridgrace.minwbsize=250; +set hive.mapjoin.hybridgrace.minnumpartitions=4; explain select count (*) from tab a join tab_part b on a.key = b.key; @@ -57,6 +61,8 @@ select count(*) from tab a join tab_part b on a.key = b.key; set hive.auto.convert.join.noconditionaltask.size=500; +set hive.mapjoin.hybridgrace.minwbsize=125; +set hive.mapjoin.hybridgrace.minnumpartitions=4; explain select count(*) from tab a join tab_part b on a.key = b.key join src1 c on a.value = c.value; select count(*) from tab a join tab_part b on a.key = b.key join src1 c on a.value = c.value; diff --git ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out new file mode 100644 index 0000000..33a85bf --- /dev/null +++ ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out @@ -0,0 +1,1583 @@ +PREHOOK: query: -- Hybrid Grace Hash Join +-- Test basic functionalities: +-- 1. Various cases when hash partitions spill +-- 2. Partitioned table spilling +-- 3. Vectorization + +SHOW DATABASES +PREHOOK: type: SHOWDATABASES +POSTHOOK: query: -- Hybrid Grace Hash Join +-- Test basic functionalities: +-- 1. Various cases when hash partitions spill +-- 2. Partitioned table spilling +-- 3. Vectorization + +SHOW DATABASES +POSTHOOK: type: SHOWDATABASES +default +PREHOOK: query: -- Base result for inner join +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint + where c.cint < 2000000000) t1 +PREHOOK: type: QUERY +POSTHOOK: query: -- Base result for inner join +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint + where c.cint < 2000000000) t1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint < 2000000000) (type: boolean) + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 3 + Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint < 2000000000) (type: boolean) + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint + where c.cint < 2000000000) t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint + where c.cint < 2000000000) t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3152013 +PREHOOK: query: -- Two partitions are created. One in memory, one on disk on creation. +-- The one in memory will eventually exceed memory limit, but won't spill. +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint + where c.cint < 2000000000) t1 +PREHOOK: type: QUERY +POSTHOOK: query: -- Two partitions are created. One in memory, one on disk on creation. +-- The one in memory will eventually exceed memory limit, but won't spill. +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint + where c.cint < 2000000000) t1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint < 2000000000) (type: boolean) + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 3 + Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint < 2000000000) (type: boolean) + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint + where c.cint < 2000000000) t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint + where c.cint < 2000000000) t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3152013 +PREHOOK: query: -- Base result for inner join +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint) t1 +PREHOOK: type: QUERY +POSTHOOK: query: -- Base result for inner join +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint) t1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cint is not null (type: boolean) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 3 + Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cint is not null (type: boolean) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint) t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint) t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3152013 +PREHOOK: query: -- 16 partitions are created: 3 in memory, 13 on disk on creation. +-- 1 partition is spilled during first round processing, which ends up having 2 in memory, 14 on disk +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint) t1 +PREHOOK: type: QUERY +POSTHOOK: query: -- 16 partitions are created: 3 in memory, 13 on disk on creation. +-- 1 partition is spilled during first round processing, which ends up having 2 in memory, 14 on disk +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint) t1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cint is not null (type: boolean) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 3 + Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cint is not null (type: boolean) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint) t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint) t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3152013 +PREHOOK: query: -- Base result for outer join +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + left outer join alltypesorc cd + on cd.cint = c.cint) t1 +PREHOOK: type: QUERY +POSTHOOK: query: -- Base result for outer join +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + left outer join alltypesorc cd + on cd.cint = c.cint) t1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 3 + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from +(select c.ctinyint + from alltypesorc c + left outer join alltypesorc cd + on cd.cint = c.cint) t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from +(select c.ctinyint + from alltypesorc c + left outer join alltypesorc cd + on cd.cint = c.cint) t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3155128 +PREHOOK: query: -- 32 partitions are created. 3 in memory, 29 on disk on creation. +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + left outer join alltypesorc cd + on cd.cint = c.cint) t1 +PREHOOK: type: QUERY +POSTHOOK: query: -- 32 partitions are created. 3 in memory, 29 on disk on creation. +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + left outer join alltypesorc cd + on cd.cint = c.cint) t1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 3 + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from +(select c.ctinyint + from alltypesorc c + left outer join alltypesorc cd + on cd.cint = c.cint) t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from +(select c.ctinyint + from alltypesorc c + left outer join alltypesorc cd + on cd.cint = c.cint) t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3155128 +PREHOOK: query: -- Partitioned table +create table parttbl (key string, value char(20)) partitioned by (dt char(10)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parttbl +POSTHOOK: query: -- Partitioned table +create table parttbl (key string, value char(20)) partitioned by (dt char(10)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parttbl +PREHOOK: query: insert overwrite table parttbl partition(dt='2000-01-01') + select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@parttbl@dt=2000-01-01 +POSTHOOK: query: insert overwrite table parttbl partition(dt='2000-01-01') + select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@parttbl@dt=2000-01-01 +POSTHOOK: Lineage: parttbl PARTITION(dt=2000-01-01).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: parttbl PARTITION(dt=2000-01-01).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table parttbl partition(dt='2000-01-02') + select * from src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: default@parttbl@dt=2000-01-02 +POSTHOOK: query: insert overwrite table parttbl partition(dt='2000-01-02') + select * from src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@parttbl@dt=2000-01-02 +POSTHOOK: Lineage: parttbl PARTITION(dt=2000-01-02).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: parttbl PARTITION(dt=2000-01-02).value EXPRESSION [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- No spill, base result +explain +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +PREHOOK: type: QUERY +POSTHOOK: query: -- No spill, base result +explain +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 1 Map 3 + Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 3 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@parttbl +PREHOOK: Input: default@parttbl@dt=2000-01-01 +PREHOOK: Input: default@parttbl@dt=2000-01-02 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parttbl +POSTHOOK: Input: default@parttbl@dt=2000-01-01 +POSTHOOK: Input: default@parttbl@dt=2000-01-02 +#### A masked pattern was here #### +1217 +PREHOOK: query: -- No spill, 2 partitions created in memory +explain +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +PREHOOK: type: QUERY +POSTHOOK: query: -- No spill, 2 partitions created in memory +explain +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 1 Map 3 + Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 3 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@parttbl +PREHOOK: Input: default@parttbl@dt=2000-01-01 +PREHOOK: Input: default@parttbl@dt=2000-01-02 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parttbl +POSTHOOK: Input: default@parttbl@dt=2000-01-01 +POSTHOOK: Input: default@parttbl@dt=2000-01-02 +#### A masked pattern was here #### +1217 +PREHOOK: query: -- Spill case base result +explain +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +PREHOOK: type: QUERY +POSTHOOK: query: -- Spill case base result +explain +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 1 Map 3 + Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 3 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@parttbl +PREHOOK: Input: default@parttbl@dt=2000-01-01 +PREHOOK: Input: default@parttbl@dt=2000-01-02 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parttbl +POSTHOOK: Input: default@parttbl@dt=2000-01-01 +POSTHOOK: Input: default@parttbl@dt=2000-01-02 +#### A masked pattern was here #### +1217 +PREHOOK: query: -- Spill case, one partition in memory, one spilled on creation +explain +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +PREHOOK: type: QUERY +POSTHOOK: query: -- Spill case, one partition in memory, one spilled on creation +explain +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 1 Map 3 + Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 3 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@parttbl +PREHOOK: Input: default@parttbl@dt=2000-01-01 +PREHOOK: Input: default@parttbl@dt=2000-01-02 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parttbl +POSTHOOK: Input: default@parttbl@dt=2000-01-01 +POSTHOOK: Input: default@parttbl@dt=2000-01-02 +#### A masked pattern was here #### +1217 +PREHOOK: query: drop table parttbl +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parttbl +PREHOOK: Output: default@parttbl +POSTHOOK: query: drop table parttbl +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parttbl +POSTHOOK: Output: default@parttbl +PREHOOK: query: -- Test vectorization +-- Test case borrowed from vector_decimal_mapjoin.q +CREATE TABLE decimal_mapjoin STORED AS ORC AS + SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, + CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2, + cint + FROM alltypesorc +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: database:default +PREHOOK: Output: default@decimal_mapjoin +POSTHOOK: query: -- Test vectorization +-- Test case borrowed from vector_decimal_mapjoin.q +CREATE TABLE decimal_mapjoin STORED AS ORC AS + SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, + CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2, + cint + FROM alltypesorc +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: database:default +POSTHOOK: Output: default@decimal_mapjoin +PREHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 + FROM decimal_mapjoin l + JOIN decimal_mapjoin r ON l.cint = r.cint + WHERE l.cint = 6981 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 + FROM decimal_mapjoin l + JOIN decimal_mapjoin r ON l.cint = r.cint + WHERE l.cint = 6981 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: l + Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint = 6981) (type: boolean) + Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 6981 (type: int) + 1 6981 (type: int) + outputColumnNames: _col1, _col9 + input vertices: + 1 Map 2 + Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 6981 (type: int), 6981 (type: int), _col1 (type: decimal(20,10)), _col9 (type: decimal(23,14)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map 2 + Map Operator Tree: + TableScan + alias: r + Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint = 6981) (type: boolean) + Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 6981 (type: int) + sort order: + + Map-reduce partition columns: 6981 (type: int) + Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE + value expressions: cdecimal2 (type: decimal(23,14)) + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 + FROM decimal_mapjoin l + JOIN decimal_mapjoin r ON l.cint = r.cint + WHERE l.cint = 6981 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_mapjoin +#### A masked pattern was here #### +POSTHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 + FROM decimal_mapjoin l + JOIN decimal_mapjoin r ON l.cint = r.cint + WHERE l.cint = 6981 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_mapjoin +#### A masked pattern was here #### +6981 6981 NULL NULL +6981 6981 NULL -617.5607769230769 +6981 6981 NULL -617.5607769230769 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL 6984454.211097692 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL -617.5607769230769 +6981 6981 NULL -617.5607769230769 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL 6984454.211097692 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL -617.5607769230769 +6981 6981 NULL -617.5607769230769 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL 6984454.211097692 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL -617.5607769230769 +6981 6981 NULL -617.5607769230769 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL 6984454.211097692 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 5831542.269248378 NULL +6981 6981 5831542.269248378 -617.5607769230769 +6981 6981 5831542.269248378 -617.5607769230769 +6981 6981 5831542.269248378 NULL +6981 6981 5831542.269248378 NULL +6981 6981 5831542.269248378 NULL +6981 6981 5831542.269248378 6984454.211097692 +6981 6981 5831542.269248378 NULL +6981 6981 5831542.269248378 NULL +6981 6981 5831542.269248378 NULL +6981 6981 NULL NULL +6981 6981 NULL -617.5607769230769 +6981 6981 NULL -617.5607769230769 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL 6984454.211097692 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL -617.5607769230769 +6981 6981 NULL -617.5607769230769 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL 6984454.211097692 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL -617.5607769230769 +6981 6981 NULL -617.5607769230769 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL 6984454.211097692 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 -617.5607769230769 +6981 6981 -515.621072973 -617.5607769230769 +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 6984454.211097692 +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 -617.5607769230769 +6981 6981 -515.621072973 -617.5607769230769 +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 6984454.211097692 +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +PREHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 + FROM decimal_mapjoin l + JOIN decimal_mapjoin r ON l.cint = r.cint + WHERE l.cint = 6981 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 + FROM decimal_mapjoin l + JOIN decimal_mapjoin r ON l.cint = r.cint + WHERE l.cint = 6981 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: l + Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint = 6981) (type: boolean) + Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 6981 (type: int) + 1 6981 (type: int) + outputColumnNames: _col1, _col9 + input vertices: + 1 Map 2 + Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: 6981 (type: int), 6981 (type: int), _col1 (type: decimal(20,10)), _col9 (type: decimal(23,14)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map 2 + Map Operator Tree: + TableScan + alias: r + Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint = 6981) (type: boolean) + Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 6981 (type: int) + sort order: + + Map-reduce partition columns: 6981 (type: int) + Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE + value expressions: cdecimal2 (type: decimal(23,14)) + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 + FROM decimal_mapjoin l + JOIN decimal_mapjoin r ON l.cint = r.cint + WHERE l.cint = 6981 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_mapjoin +#### A masked pattern was here #### +POSTHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 + FROM decimal_mapjoin l + JOIN decimal_mapjoin r ON l.cint = r.cint + WHERE l.cint = 6981 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_mapjoin +#### A masked pattern was here #### +6981 6981 NULL NULL +6981 6981 NULL -617.5607769230769 +6981 6981 NULL -617.5607769230769 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL 6984454.211097692 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL -617.5607769230769 +6981 6981 NULL -617.5607769230769 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL 6984454.211097692 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL -617.5607769230769 +6981 6981 NULL -617.5607769230769 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL 6984454.211097692 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL -617.5607769230769 +6981 6981 NULL -617.5607769230769 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL 6984454.211097692 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 5831542.269248378 NULL +6981 6981 5831542.269248378 -617.5607769230769 +6981 6981 5831542.269248378 -617.5607769230769 +6981 6981 5831542.269248378 NULL +6981 6981 5831542.269248378 NULL +6981 6981 5831542.269248378 NULL +6981 6981 5831542.269248378 6984454.211097692 +6981 6981 5831542.269248378 NULL +6981 6981 5831542.269248378 NULL +6981 6981 5831542.269248378 NULL +6981 6981 NULL NULL +6981 6981 NULL -617.5607769230769 +6981 6981 NULL -617.5607769230769 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL 6984454.211097692 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL -617.5607769230769 +6981 6981 NULL -617.5607769230769 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL 6984454.211097692 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL -617.5607769230769 +6981 6981 NULL -617.5607769230769 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL 6984454.211097692 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 -617.5607769230769 +6981 6981 -515.621072973 -617.5607769230769 +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 6984454.211097692 +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 -617.5607769230769 +6981 6981 -515.621072973 -617.5607769230769 +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 6984454.211097692 +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +PREHOOK: query: DROP TABLE decimal_mapjoin +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@decimal_mapjoin +PREHOOK: Output: default@decimal_mapjoin +POSTHOOK: query: DROP TABLE decimal_mapjoin +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@decimal_mapjoin +POSTHOOK: Output: default@decimal_mapjoin diff --git ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_2.q.out ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_2.q.out new file mode 100644 index 0000000..412b39b --- /dev/null +++ ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_2.q.out @@ -0,0 +1,822 @@ +PREHOOK: query: -- Hybrid Grace Hash Join +-- Test n-way join +SHOW DATABASES +PREHOOK: type: SHOWDATABASES +POSTHOOK: query: -- Hybrid Grace Hash Join +-- Test n-way join +SHOW DATABASES +POSTHOOK: type: SHOWDATABASES +default +PREHOOK: query: SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN srcpart w ON (x.key = w.key) +JOIN src y ON (y.key = x.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN srcpart w ON (x.key = w.key) +JOIN src y ON (y.key = x.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +5680 +PREHOOK: query: -- Test 3-way mapjoin (1 big table, 2 small tables) +SHOW DATABASES +PREHOOK: type: SHOWDATABASES +POSTHOOK: query: -- Test 3-way mapjoin (1 big table, 2 small tables) +SHOW DATABASES +POSTHOOK: type: SHOWDATABASES +default +PREHOOK: query: EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Map 2 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + input vertices: + 0 Map 1 + 2 Map 4 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 4 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +428 +PREHOOK: query: EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Map 2 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + input vertices: + 0 Map 1 + 2 Map 4 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 4 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +428 +PREHOOK: query: -- Test 4-way mapjoin (1 big table, 3 small tables) +SHOW DATABASES +PREHOOK: type: SHOWDATABASES +POSTHOOK: query: -- Test 4-way mapjoin (1 big table, 3 small tables) +SHOW DATABASES +POSTHOOK: type: SHOWDATABASES +default +PREHOOK: query: EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN srcpart w ON (x.key = w.key) +JOIN src y ON (y.key = x.key) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN srcpart w ON (x.key = w.key) +JOIN src y ON (y.key = x.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Map 2 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + Inner Join 0 to 3 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + input vertices: + 0 Map 1 + 2 Map 4 + 3 Map 5 + Statistics: Num rows: 3300 Data size: 35059 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 4 + Map Operator Tree: + TableScan + alias: w + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN srcpart w ON (x.key = w.key) +JOIN src y ON (y.key = x.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN srcpart w ON (x.key = w.key) +JOIN src y ON (y.key = x.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +5680 +PREHOOK: query: EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN srcpart w ON (x.key = w.key) +JOIN src y ON (y.key = x.key) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN srcpart w ON (x.key = w.key) +JOIN src y ON (y.key = x.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Map 2 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + Inner Join 0 to 3 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + input vertices: + 0 Map 1 + 2 Map 4 + 3 Map 5 + Statistics: Num rows: 3300 Data size: 35059 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 4 + Map Operator Tree: + TableScan + alias: w + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN srcpart w ON (x.key = w.key) +JOIN src y ON (y.key = x.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN srcpart w ON (x.key = w.key) +JOIN src y ON (y.key = x.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +5680 +PREHOOK: query: -- Borrowed from auto_join29.q +SHOW DATABASES +PREHOOK: type: SHOWDATABASES +POSTHOOK: query: -- Borrowed from auto_join29.q +SHOW DATABASES +POSTHOOK: type: SHOWDATABASES +default +PREHOOK: query: explain +SELECT * FROM src src1 +JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) +JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) +SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT * FROM src src1 +JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) +JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) +SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 10)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 3 + 2 Map 4 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 10) and (key > 10)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 10)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM src src1 +JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) +JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) +SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM src src1 +JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) +JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) +SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +PREHOOK: query: explain +SELECT * FROM src src1 +JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) +JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) +SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT * FROM src src1 +JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) +JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) +SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 10)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 3 + 2 Map 4 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 10) and (key > 10)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 10)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM src src1 +JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) +JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) +SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM src src1 +JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) +JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) +SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/tez/hybridhashjoin.q.out ql/src/test/results/clientpositive/tez/hybridhashjoin.q.out deleted file mode 100644 index 5d5b850..0000000 --- ql/src/test/results/clientpositive/tez/hybridhashjoin.q.out +++ /dev/null @@ -1,1566 +0,0 @@ -PREHOOK: query: -- Base result for inner join -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint - where c.cint < 2000000000) t1 -PREHOOK: type: QUERY -POSTHOOK: query: -- Base result for inner join -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint - where c.cint < 2000000000) t1 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cint < 2000000000) (type: boolean) - Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cint < 2000000000) (type: boolean) - Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint - where c.cint < 2000000000) t1 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint - where c.cint < 2000000000) t1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -#### A masked pattern was here #### -3152013 -PREHOOK: query: -- Two partitions are created. One in memory, one on disk on creation. --- The one in memory will eventually exceed memory limit, but won't spill. -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint - where c.cint < 2000000000) t1 -PREHOOK: type: QUERY -POSTHOOK: query: -- Two partitions are created. One in memory, one on disk on creation. --- The one in memory will eventually exceed memory limit, but won't spill. -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint - where c.cint < 2000000000) t1 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cint < 2000000000) (type: boolean) - Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cint < 2000000000) (type: boolean) - Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint - where c.cint < 2000000000) t1 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint - where c.cint < 2000000000) t1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -#### A masked pattern was here #### -3152013 -PREHOOK: query: -- Base result for inner join -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint) t1 -PREHOOK: type: QUERY -POSTHOOK: query: -- Base result for inner join -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint) t1 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: cint is not null (type: boolean) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: cint is not null (type: boolean) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint) t1 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint) t1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -#### A masked pattern was here #### -3152013 -PREHOOK: query: -- 16 partitions are created: 3 in memory, 13 on disk on creation. --- 1 partition is spilled during first round processing, which ends up having 2 in memory, 14 on disk -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint) t1 -PREHOOK: type: QUERY -POSTHOOK: query: -- 16 partitions are created: 3 in memory, 13 on disk on creation. --- 1 partition is spilled during first round processing, which ends up having 2 in memory, 14 on disk -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint) t1 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: cint is not null (type: boolean) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: cint is not null (type: boolean) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint) t1 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint) t1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -#### A masked pattern was here #### -3152013 -PREHOOK: query: -- Base result for outer join -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - left outer join alltypesorc cd - on cd.cint = c.cint) t1 -PREHOOK: type: QUERY -POSTHOOK: query: -- Base result for outer join -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - left outer join alltypesorc cd - on cd.cint = c.cint) t1 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from -(select c.ctinyint - from alltypesorc c - left outer join alltypesorc cd - on cd.cint = c.cint) t1 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from -(select c.ctinyint - from alltypesorc c - left outer join alltypesorc cd - on cd.cint = c.cint) t1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -#### A masked pattern was here #### -3155128 -PREHOOK: query: -- 32 partitions are created. 3 in memory, 29 on disk on creation. -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - left outer join alltypesorc cd - on cd.cint = c.cint) t1 -PREHOOK: type: QUERY -POSTHOOK: query: -- 32 partitions are created. 3 in memory, 29 on disk on creation. -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - left outer join alltypesorc cd - on cd.cint = c.cint) t1 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from -(select c.ctinyint - from alltypesorc c - left outer join alltypesorc cd - on cd.cint = c.cint) t1 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from -(select c.ctinyint - from alltypesorc c - left outer join alltypesorc cd - on cd.cint = c.cint) t1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -#### A masked pattern was here #### -3155128 -PREHOOK: query: -- Partitioned table -create table parttbl (key string, value char(20)) partitioned by (dt char(10)) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@parttbl -POSTHOOK: query: -- Partitioned table -create table parttbl (key string, value char(20)) partitioned by (dt char(10)) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@parttbl -PREHOOK: query: insert overwrite table parttbl partition(dt='2000-01-01') - select * from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@parttbl@dt=2000-01-01 -POSTHOOK: query: insert overwrite table parttbl partition(dt='2000-01-01') - select * from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@parttbl@dt=2000-01-01 -POSTHOOK: Lineage: parttbl PARTITION(dt=2000-01-01).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: parttbl PARTITION(dt=2000-01-01).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table parttbl partition(dt='2000-01-02') - select * from src1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -PREHOOK: Output: default@parttbl@dt=2000-01-02 -POSTHOOK: query: insert overwrite table parttbl partition(dt='2000-01-02') - select * from src1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -POSTHOOK: Output: default@parttbl@dt=2000-01-02 -POSTHOOK: Lineage: parttbl PARTITION(dt=2000-01-02).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: parttbl PARTITION(dt=2000-01-02).value EXPRESSION [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- No spill, base result -explain -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -PREHOOK: type: QUERY -POSTHOOK: query: -- No spill, base result -explain -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 1 Map 3 - Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: p2 - Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -PREHOOK: type: QUERY -PREHOOK: Input: default@parttbl -PREHOOK: Input: default@parttbl@dt=2000-01-01 -PREHOOK: Input: default@parttbl@dt=2000-01-02 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@parttbl -POSTHOOK: Input: default@parttbl@dt=2000-01-01 -POSTHOOK: Input: default@parttbl@dt=2000-01-02 -#### A masked pattern was here #### -1217 -PREHOOK: query: -- No spill, 2 partitions created in memory -explain -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -PREHOOK: type: QUERY -POSTHOOK: query: -- No spill, 2 partitions created in memory -explain -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 1 Map 3 - Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: p2 - Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -PREHOOK: type: QUERY -PREHOOK: Input: default@parttbl -PREHOOK: Input: default@parttbl@dt=2000-01-01 -PREHOOK: Input: default@parttbl@dt=2000-01-02 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@parttbl -POSTHOOK: Input: default@parttbl@dt=2000-01-01 -POSTHOOK: Input: default@parttbl@dt=2000-01-02 -#### A masked pattern was here #### -1217 -PREHOOK: query: -- Spill case base result -explain -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -PREHOOK: type: QUERY -POSTHOOK: query: -- Spill case base result -explain -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 1 Map 3 - Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: p2 - Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -PREHOOK: type: QUERY -PREHOOK: Input: default@parttbl -PREHOOK: Input: default@parttbl@dt=2000-01-01 -PREHOOK: Input: default@parttbl@dt=2000-01-02 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@parttbl -POSTHOOK: Input: default@parttbl@dt=2000-01-01 -POSTHOOK: Input: default@parttbl@dt=2000-01-02 -#### A masked pattern was here #### -1217 -PREHOOK: query: -- Spill case, one partition in memory, one spilled on creation -explain -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -PREHOOK: type: QUERY -POSTHOOK: query: -- Spill case, one partition in memory, one spilled on creation -explain -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 1 Map 3 - Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: p2 - Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -PREHOOK: type: QUERY -PREHOOK: Input: default@parttbl -PREHOOK: Input: default@parttbl@dt=2000-01-01 -PREHOOK: Input: default@parttbl@dt=2000-01-02 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@parttbl -POSTHOOK: Input: default@parttbl@dt=2000-01-01 -POSTHOOK: Input: default@parttbl@dt=2000-01-02 -#### A masked pattern was here #### -1217 -PREHOOK: query: drop table parttbl -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@parttbl -PREHOOK: Output: default@parttbl -POSTHOOK: query: drop table parttbl -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@parttbl -POSTHOOK: Output: default@parttbl -PREHOOK: query: -- Test vectorization --- Test case borrowed from vector_decimal_mapjoin.q -CREATE TABLE decimal_mapjoin STORED AS ORC AS - SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, - CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2, - cint - FROM alltypesorc -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@alltypesorc -PREHOOK: Output: database:default -PREHOOK: Output: default@decimal_mapjoin -POSTHOOK: query: -- Test vectorization --- Test case borrowed from vector_decimal_mapjoin.q -CREATE TABLE decimal_mapjoin STORED AS ORC AS - SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, - CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2, - cint - FROM alltypesorc -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@alltypesorc -POSTHOOK: Output: database:default -POSTHOOK: Output: default@decimal_mapjoin -PREHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 - FROM decimal_mapjoin l - JOIN decimal_mapjoin r ON l.cint = r.cint - WHERE l.cint = 6981 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 - FROM decimal_mapjoin l - JOIN decimal_mapjoin r ON l.cint = r.cint - WHERE l.cint = 6981 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: l - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cint = 6981) (type: boolean) - Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 6981 (type: int) - 1 6981 (type: int) - outputColumnNames: _col1, _col9 - input vertices: - 1 Map 2 - Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 6981 (type: int), 6981 (type: int), _col1 (type: decimal(20,10)), _col9 (type: decimal(23,14)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Map 2 - Map Operator Tree: - TableScan - alias: r - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cint = 6981) (type: boolean) - Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: 6981 (type: int) - sort order: + - Map-reduce partition columns: 6981 (type: int) - Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE - value expressions: cdecimal2 (type: decimal(23,14)) - Execution mode: vectorized - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 - FROM decimal_mapjoin l - JOIN decimal_mapjoin r ON l.cint = r.cint - WHERE l.cint = 6981 -PREHOOK: type: QUERY -PREHOOK: Input: default@decimal_mapjoin -#### A masked pattern was here #### -POSTHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 - FROM decimal_mapjoin l - JOIN decimal_mapjoin r ON l.cint = r.cint - WHERE l.cint = 6981 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@decimal_mapjoin -#### A masked pattern was here #### -6981 6981 NULL NULL -6981 6981 NULL -617.5607769230769 -6981 6981 NULL -617.5607769230769 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL 6984454.211097692 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL -617.5607769230769 -6981 6981 NULL -617.5607769230769 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL 6984454.211097692 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL -617.5607769230769 -6981 6981 NULL -617.5607769230769 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL 6984454.211097692 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL -617.5607769230769 -6981 6981 NULL -617.5607769230769 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL 6984454.211097692 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 5831542.269248378 NULL -6981 6981 5831542.269248378 -617.5607769230769 -6981 6981 5831542.269248378 -617.5607769230769 -6981 6981 5831542.269248378 NULL -6981 6981 5831542.269248378 NULL -6981 6981 5831542.269248378 NULL -6981 6981 5831542.269248378 6984454.211097692 -6981 6981 5831542.269248378 NULL -6981 6981 5831542.269248378 NULL -6981 6981 5831542.269248378 NULL -6981 6981 NULL NULL -6981 6981 NULL -617.5607769230769 -6981 6981 NULL -617.5607769230769 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL 6984454.211097692 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL -617.5607769230769 -6981 6981 NULL -617.5607769230769 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL 6984454.211097692 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL -617.5607769230769 -6981 6981 NULL -617.5607769230769 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL 6984454.211097692 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 -617.5607769230769 -6981 6981 -515.621072973 -617.5607769230769 -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 6984454.211097692 -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 -617.5607769230769 -6981 6981 -515.621072973 -617.5607769230769 -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 6984454.211097692 -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -PREHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 - FROM decimal_mapjoin l - JOIN decimal_mapjoin r ON l.cint = r.cint - WHERE l.cint = 6981 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 - FROM decimal_mapjoin l - JOIN decimal_mapjoin r ON l.cint = r.cint - WHERE l.cint = 6981 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: l - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cint = 6981) (type: boolean) - Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 6981 (type: int) - 1 6981 (type: int) - outputColumnNames: _col1, _col9 - input vertices: - 1 Map 2 - Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Select Operator - expressions: 6981 (type: int), 6981 (type: int), _col1 (type: decimal(20,10)), _col9 (type: decimal(23,14)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Map 2 - Map Operator Tree: - TableScan - alias: r - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cint = 6981) (type: boolean) - Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: 6981 (type: int) - sort order: + - Map-reduce partition columns: 6981 (type: int) - Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE - value expressions: cdecimal2 (type: decimal(23,14)) - Execution mode: vectorized - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 - FROM decimal_mapjoin l - JOIN decimal_mapjoin r ON l.cint = r.cint - WHERE l.cint = 6981 -PREHOOK: type: QUERY -PREHOOK: Input: default@decimal_mapjoin -#### A masked pattern was here #### -POSTHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 - FROM decimal_mapjoin l - JOIN decimal_mapjoin r ON l.cint = r.cint - WHERE l.cint = 6981 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@decimal_mapjoin -#### A masked pattern was here #### -6981 6981 NULL NULL -6981 6981 NULL -617.5607769230769 -6981 6981 NULL -617.5607769230769 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL 6984454.211097692 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL -617.5607769230769 -6981 6981 NULL -617.5607769230769 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL 6984454.211097692 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL -617.5607769230769 -6981 6981 NULL -617.5607769230769 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL 6984454.211097692 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL -617.5607769230769 -6981 6981 NULL -617.5607769230769 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL 6984454.211097692 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 5831542.269248378 NULL -6981 6981 5831542.269248378 -617.5607769230769 -6981 6981 5831542.269248378 -617.5607769230769 -6981 6981 5831542.269248378 NULL -6981 6981 5831542.269248378 NULL -6981 6981 5831542.269248378 NULL -6981 6981 5831542.269248378 6984454.211097692 -6981 6981 5831542.269248378 NULL -6981 6981 5831542.269248378 NULL -6981 6981 5831542.269248378 NULL -6981 6981 NULL NULL -6981 6981 NULL -617.5607769230769 -6981 6981 NULL -617.5607769230769 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL 6984454.211097692 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL -617.5607769230769 -6981 6981 NULL -617.5607769230769 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL 6984454.211097692 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL -617.5607769230769 -6981 6981 NULL -617.5607769230769 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL 6984454.211097692 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 -617.5607769230769 -6981 6981 -515.621072973 -617.5607769230769 -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 6984454.211097692 -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 -617.5607769230769 -6981 6981 -515.621072973 -617.5607769230769 -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 6984454.211097692 -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -PREHOOK: query: DROP TABLE decimal_mapjoin -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@decimal_mapjoin -PREHOOK: Output: default@decimal_mapjoin -POSTHOOK: query: DROP TABLE decimal_mapjoin -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@decimal_mapjoin -POSTHOOK: Output: default@decimal_mapjoin