diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index c9ee423..58fc867 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -731,6 +731,10 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { HIVEHYBRIDGRACEHASHJOINMEMCHECKFREQ("hive.mapjoin.hybridgrace.memcheckfrequency", 1024, "For " + "hybrid grace hash join, how often (how many rows apart) we check if memory is full. " + "This number should be power of 2."), + HIVEHYBRIDGRACEHASHJOINMINWBSIZE("hive.mapjoin.hybridgrace.minwbsize", 512, "For hybrid grace" + + " hash join, the minimum write buffer size used by optimized hashtable."), + HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS("hive.mapjoin.hybridgrace.minnumpartitions", 16, "For" + + " hybrid grace hash join, the minimum number of partitions to create."), HIVEHASHTABLEWBSIZE("hive.mapjoin.optimized.hashtable.wbsize", 10 * 1024 * 1024, "Optimized hashtable (see hive.mapjoin.optimized.hashtable) uses a chain of buffers to\n" + "store data. This is one buffer size. HT may be slightly faster if this is larger, but for small\n" + diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index c3a1dcd..413c3a3 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -300,7 +300,8 @@ minitez.query.files=bucket_map_join_tez1.q,\ dynamic_partition_pruning_2.q,\ explainuser_1.q,\ explainuser_2.q,\ - hybridhashjoin.q,\ + hybridgrace_hashjoin_1.q,\ + hybridgrace_hashjoin_2.q,\ mapjoin_decimal.q,\ lvj_mapjoin.q, \ mrr.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java index c3e3078..cbf2d43 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java @@ -32,6 +32,6 @@ void init(ExecMapperContext context, MapredContext mrContext, Configuration hconf, MapJoinOperator joinOp); - void load(MapJoinTableContainer[] mapJoinTables, MapJoinTableContainerSerDe[] mapJoinTableSerdes, - long memUsage) throws HiveException; + void load(MapJoinTableContainer[] mapJoinTables, MapJoinTableContainerSerDe[] mapJoinTableSerdes) + throws HiveException; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java index 1665772..e519901 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java @@ -35,7 +35,6 @@ import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.HashTableLoaderFactory; -import org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionHandler; import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; import org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap; import org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer; @@ -89,9 +88,10 @@ private UnwrapRowContainer[] unwrapContainer; private transient Configuration hconf; private transient boolean hybridMapJoinLeftover; // whether there's spilled data to be processed - protected transient MapJoinBytesTableContainer currentSmallTable; // reloaded hashmap from disk - protected transient int tag; // big table alias - protected transient int smallTable; // small table alias + protected transient MapJoinBytesTableContainer[] spilledMapJoinTables; // used to hold restored + // spilled small tables + protected HybridHashTableContainer firstSmallTable; // The first small table; + // Only this table has spilled big table rows public MapJoinOperator() { } @@ -272,7 +272,6 @@ public void generateMapMetaData() throws HiveException { protected Pair loadHashTable( ExecMapperContext mapContext, MapredContext mrContext) throws HiveException { - loadCalled = true; if (this.hashTblInitedOnce @@ -285,9 +284,7 @@ public void generateMapMetaData() throws HiveException { perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.LOAD_HASHTABLE); loader.init(mapContext, mrContext, hconf, this); - long memUsage = (long)(MapJoinMemoryExhaustionHandler.getMaxHeapSize() - * conf.getHashTableMemoryUsage()); - loader.load(mapJoinTables, mapJoinTableSerdes, memUsage); + loader.load(mapJoinTables, mapJoinTableSerdes); hashTblInitedOnce = true; @@ -325,18 +322,6 @@ protected MapJoinKey getRefKey(byte alias) { @Override public void process(Object row, int tag) throws HiveException { - this.tag = tag; - - // As we're calling processOp again to process the leftover triplets, we know the "row" is - // coming from the on-disk matchfile. We need to recreate hashMapRowGetter against new hashtable - if (hybridMapJoinLeftover) { - assert hashMapRowGetters != null; - if (hashMapRowGetters[smallTable] == null) { - MapJoinKey refKey = getRefKey((byte) tag); - hashMapRowGetters[smallTable] = currentSmallTable.createGetter(refKey); - } - } - try { alias = (byte) tag; if (hashMapRowGetters == null) { @@ -349,13 +334,24 @@ public void process(Object row, int tag) throws HiveException { } } + // As we're calling processOp again to process the leftover triplets, we know the "row" is + // coming from the spilled matchfile. We need to recreate hashMapRowGetter against new hashtables + if (hybridMapJoinLeftover) { + MapJoinKey refKey = getRefKey(alias); + for (byte pos = 0; pos < order.length; pos++) { + if (pos != alias && spilledMapJoinTables[pos] != null) { + hashMapRowGetters[pos] = spilledMapJoinTables[pos].createGetter(refKey); + } + } + } + // compute keys and values as StandardObjects ReusableGetAdaptor firstSetKey = null; int fieldCount = joinKeys[alias].size(); boolean joinNeeded = false; + boolean bigTableRowSpilled = false; for (byte pos = 0; pos < order.length; pos++) { if (pos != alias) { - smallTable = pos; // record small table alias JoinUtil.JoinResult joinResult; ReusableGetAdaptor adaptor; if (firstSetKey == null) { @@ -390,8 +386,10 @@ public void process(Object row, int tag) throws HiveException { aliasFilterTags[pos] = rowContainer.getAliasFilter(); } // Spill the big table rows into appropriate partition - if (joinResult == JoinUtil.JoinResult.SPILL) { + if (joinResult == JoinUtil.JoinResult.SPILL && + !bigTableRowSpilled) { // For n-way join, only spill big table rows once spillBigTableRow(mapJoinTables[pos], row); + bigTableRowSpilled = true; } } } @@ -431,7 +429,6 @@ protected void spillBigTableRow(MapJoinTableContainer hybridHtContainer, Object @Override public void closeOp(boolean abort) throws HiveException { - boolean spilled = false; for (MapJoinTableContainer container: mapJoinTables) { if (container != null) { @@ -440,10 +437,27 @@ public void closeOp(boolean abort) throws HiveException { } } + // For Hybrid Grace Hash Join, we need to see if there is any spilled data to be processed next if (spilled) { - for (MapJoinTableContainer tableContainer : mapJoinTables) { - if (tableContainer != null) { - if (tableContainer instanceof HybridHashTableContainer) { + int numPartitions = 0; + // Find out number of partitions for each small table (should be same across tables) + for (byte pos = 0; pos < mapJoinTables.length; pos++) { + if (pos != alias) { + firstSmallTable = (HybridHashTableContainer)mapJoinTables[pos]; + numPartitions = firstSmallTable.getHashPartitions().length; + break; + } + } + assert numPartitions != 0 : "Number of partitions must be greater than 0!"; + + if (firstSmallTable.hasSpill()) { + spilledMapJoinTables = new MapJoinBytesTableContainer[mapJoinTables.length]; + hybridMapJoinLeftover = true; + + // Clear all in-memory partitions first + for (byte pos = 0; pos < mapJoinTables.length; pos++) { + MapJoinTableContainer tableContainer = mapJoinTables[pos]; + if (tableContainer != null && tableContainer instanceof HybridHashTableContainer) { HybridHashTableContainer hybridHtContainer = (HybridHashTableContainer) tableContainer; hybridHtContainer.dumpStats(); @@ -453,29 +467,30 @@ public void closeOp(boolean abort) throws HiveException { if (!hashPartitions[i].isHashMapOnDisk()) { hybridHtContainer.setTotalInMemRowCount( hybridHtContainer.getTotalInMemRowCount() - - hashPartitions[i].getHashMapFromMemory().getNumValues()); + hashPartitions[i].getHashMapFromMemory().getNumValues()); hashPartitions[i].getHashMapFromMemory().clear(); } } assert hybridHtContainer.getTotalInMemRowCount() == 0; + } + } - for (int i = 0; i < hashPartitions.length; i++) { - if (hashPartitions[i].isHashMapOnDisk()) { - // Recursively process on-disk triplets (hash partition, sidefile, matchfile) - try { - hybridMapJoinLeftover = true; - hashMapRowGetters[smallTable] = null; - continueProcess(hashPartitions[i], hybridHtContainer); - } catch (IOException e) { - e.printStackTrace(); - } catch (ClassNotFoundException e) { - e.printStackTrace(); - } catch (SerDeException e) { - e.printStackTrace(); - } - } - hybridMapJoinLeftover = false; - currentSmallTable = null; + // Reprocess the spilled data + for (int i = 0; i < numPartitions; i++) { + HashPartition[] hashPartitions = firstSmallTable.getHashPartitions(); + if (hashPartitions[i].isHashMapOnDisk()) { + try { + continueProcess(i); // Re-process spilled data + } catch (IOException e) { + e.printStackTrace(); + } catch (SerDeException e) { + e.printStackTrace(); + } catch (ClassNotFoundException e) { + e.printStackTrace(); + } + for (byte pos = 0; pos < order.length; pos++) { + if (pos != alias) + spilledMapJoinTables[pos] = null; } } } @@ -496,18 +511,20 @@ public void closeOp(boolean abort) throws HiveException { } /** - * Continue processing each pair of spilled hashtable and big table row container - * @param partition hash partition to process - * @param hybridHtContainer Hybrid hashtable container + * Continue processing join between spilled hashtable(s) and spilled big table + * @param partitionId the partition number across all small tables to process * @throws HiveException * @throws IOException - * @throws ClassNotFoundException * @throws SerDeException */ - private void continueProcess(HashPartition partition, HybridHashTableContainer hybridHtContainer) - throws HiveException, IOException, ClassNotFoundException, SerDeException { - reloadHashTable(partition, hybridHtContainer); - reProcessBigTable(partition); + private void continueProcess(int partitionId) + throws HiveException, IOException, SerDeException, ClassNotFoundException { + for (byte pos = 0; pos < mapJoinTables.length; pos++) { + if (pos != alias) { + reloadHashTable(pos, partitionId); + } + } + reProcessBigTable(partitionId); } /** @@ -515,16 +532,16 @@ private void continueProcess(HashPartition partition, HybridHashTableContainer h * It can have two steps: * 1) Deserialize a serialized hash table, and * 2) Merge every key/value pair from small table container into the hash table - * @param partition hash partition to process - * @param hybridHtContainer Hybrid hashtable container + * @param pos position of small table + * @param partitionId the partition of the small table to be reloaded from * @throws IOException - * @throws ClassNotFoundException * @throws HiveException * @throws SerDeException */ - protected void reloadHashTable(HashPartition partition, - HybridHashTableContainer hybridHtContainer) - throws IOException, ClassNotFoundException, HiveException, SerDeException { + protected void reloadHashTable(byte pos, int partitionId) + throws IOException, HiveException, SerDeException, ClassNotFoundException { + HybridHashTableContainer container = (HybridHashTableContainer)mapJoinTables[pos]; + HashPartition partition = container.getHashPartitions()[partitionId]; // Merge the sidefile into the newly created hash table // This is where the spilling may happen again @@ -543,11 +560,12 @@ protected void reloadHashTable(HashPartition partition, // If based on the new key count, keyCount is smaller than a threshold, // then just load the entire restored hashmap into memory. // The size of deserialized partition shouldn't exceed half of memory limit - if (rowCount * hybridHtContainer.getTableRowSize() >= hybridHtContainer.getMemoryThreshold() / 2) { - LOG.info("Hybrid Grace Hash Join: Hash table reload can fail since it will be greater than memory limit. Recursive spilling is currently not supported"); + if (rowCount * container.getTableRowSize() >= container.getMemoryThreshold() / 2) { + LOG.warn("Hybrid Grace Hash Join: Hash table cannot be reloaded since it" + + " will be greater than memory limit. Recursive spilling is currently not supported"); } - KeyValueHelper writeHelper = hybridHtContainer.getWriteHelper(); + KeyValueHelper writeHelper = container.getWriteHelper(); while (kvContainer.hasNext()) { ObjectPair pair = kvContainer.next(); Writable key = pair.getFirst(); @@ -556,27 +574,26 @@ protected void reloadHashTable(HashPartition partition, restoredHashMap.put(writeHelper, -1); } - hybridHtContainer.setTotalInMemRowCount(hybridHtContainer.getTotalInMemRowCount() + container.setTotalInMemRowCount(container.getTotalInMemRowCount() + restoredHashMap.getNumValues() + kvContainer.size()); kvContainer.clear(); - // Since there's only one hashmap to deal with, it's OK to create a MapJoinBytesTableContainer - currentSmallTable = new MapJoinBytesTableContainer(restoredHashMap); - currentSmallTable.setInternalValueOi(hybridHtContainer.getInternalValueOi()); - currentSmallTable.setSortableSortOrders(hybridHtContainer.getSortableSortOrders()); + spilledMapJoinTables[pos] = new MapJoinBytesTableContainer(restoredHashMap); + spilledMapJoinTables[pos].setInternalValueOi(container.getInternalValueOi()); + spilledMapJoinTables[pos].setSortableSortOrders(container.getSortableSortOrders()); } /** * Iterate over the big table row container and feed process() with leftover rows - * @param partition the hash partition being brought back to memory at the moment + * @param partitionId the partition from which to take out spilled big table rows * @throws HiveException - * @throws IOException */ - protected void reProcessBigTable(HashPartition partition) throws HiveException, IOException { + protected void reProcessBigTable(int partitionId) throws HiveException { + HashPartition partition = firstSmallTable.getHashPartitions()[partitionId]; ObjectContainer bigTable = partition.getMatchfileObjContainer(); while (bigTable.hasNext()) { Object row = bigTable.next(); - process(row, tag); + process(row, alias); } bigTable.clear(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HashTableLoader.java ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HashTableLoader.java index 96a6728..abf38e4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HashTableLoader.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HashTableLoader.java @@ -72,7 +72,7 @@ public void init(ExecMapperContext context, MapredContext mrContext, Configurati @Override public void load( MapJoinTableContainer[] mapJoinTables, - MapJoinTableContainerSerDe[] mapJoinTableSerdes, long memUsage) throws HiveException { + MapJoinTableContainerSerDe[] mapJoinTableSerdes) throws HiveException { String currentInputPath = context.getCurrentInputPath().toString(); LOG.info("******* Load from HashTable for input file: " + currentInputPath); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java index dd5c621..2ba622e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java @@ -566,6 +566,7 @@ public void clear() { this.writeBuffers.clear(); this.refs = new long[1]; this.keysAssigned = 0; + this.numValues = 0; } public void expandAndRehashToTarget(int estimateNewRowCount) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableConf.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableConf.java new file mode 100644 index 0000000..625038f --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableConf.java @@ -0,0 +1,86 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.persistence; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * This conf class is a wrapper of a list of HybridHashTableContainers and some common info shared + * among them, which is used in n-way join (multiple small tables are involved). + */ +public class HybridHashTableConf { + private List loadedContainerList; // A list of alrady loaded containers + private int numberOfPartitions = 0; // Number of partitions each table should have + private int nextSpillPartition = -1; // The partition to be spilled next + + public HybridHashTableConf() { + loadedContainerList = new ArrayList(); + } + + public int getNumberOfPartitions() { + return numberOfPartitions; + } + + public void setNumberOfPartitions(int numberOfPartitions) { + this.numberOfPartitions = numberOfPartitions; + this.nextSpillPartition = numberOfPartitions - 1; + } + + public int getNextSpillPartition() { + return this.nextSpillPartition; + } + + public void setNextSpillPartition(int nextSpillPartition) { + this.nextSpillPartition = nextSpillPartition; + } + + + public List getLoadedContainerList() { + return loadedContainerList; + } + + /** + * Spill one in-memory partition from tail for all previously loaded HybridHashTableContainers. + * Also mark that partition number as spill-on-creation for future created containers. + * @return amount of memory freed; 0 if only one last partition is in memory for each container + */ + public long spill() throws IOException { + if (nextSpillPartition == 0) { + return 0; + } + long memFreed = 0; + for (HybridHashTableContainer container : loadedContainerList) { + memFreed += container.spillPartition(nextSpillPartition); + container.setSpill(true); + } + nextSpillPartition--; + return memFreed; + } + + /** + * Check if a partition should be spilled directly on creation + * @param partitionId the partition to create + * @return true if it should be spilled directly, false otherwise + */ + public boolean doSpillOnCreation(int partitionId) { + return nextSpillPartition != -1 && partitionId > nextSpillPartition; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java index cb9083d..714cdfc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java @@ -72,15 +72,18 @@ private static final Log LOG = LogFactory.getLog(HybridHashTableContainer.class); private final HashPartition[] hashPartitions; // an array of partitions holding the triplets - private int totalInMemRowCount = 0; // total number of small table rows in memory - private final long memoryThreshold; // the max memory limit allocated + private int totalInMemRowCount = 0; // total number of small table rows in memory + private long memoryThreshold; // the max memory limit that can be allocated + private long memoryUsed; // the actual memory used + private int writeBufferSize; // write buffer size for this HybridHashTableContainer private final long tableRowSize; // row size of the small table - private boolean isSpilled; // whether there's any spilled partition - private int toSpillPartitionId; // the partition into which to spill the big table row; - // This may change after every setMapJoinKey call - private int numPartitionsSpilled; // number of spilled partitions - private boolean lastPartitionInMem; // only one (last one) partition is left in memory + private boolean isSpilled; // whether there's any spilled partition + private int toSpillPartitionId; // the partition into which to spill the big table row; + // This may change after every setMapJoinKey call + private int numPartitionsSpilled; // number of spilled partitions + private boolean lastPartitionInMem; // only one (last one) partition is left in memory private final int memoryCheckFrequency; // how often (# of rows apart) to check if memory is full + private HybridHashTableConf nwayConf; // configuration for n-way join /** The OI used to deserialize values. We never deserialize keys. */ private LazyBinaryStructObjectInspector internalValueOi; @@ -182,53 +185,91 @@ public boolean isHashMapOnDisk() { } } - public HybridHashTableContainer(Configuration hconf, long keyCount, long memUsage, long tableSize) - throws SerDeException { + public HybridHashTableContainer(Configuration hconf, long keyCount, long memoryAvailable, + long estimatedTableSize, HybridHashTableConf nwayConf) + throws SerDeException, IOException { this(HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEKEYCOUNTADJUSTMENT), HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLETHRESHOLD), HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLELOADFACTOR), - HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEWBSIZE), - HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD), HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMEMCHECKFREQ), - tableSize, keyCount, memUsage); + HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINWBSIZE), + HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS), + estimatedTableSize, keyCount, memoryAvailable, nwayConf); } - private HybridHashTableContainer(float keyCountAdj, int threshold, float loadFactor, int wbSize, - long noConditionalTaskThreshold, int memCheckFreq, long tableSize, - long keyCount, long memUsage) throws SerDeException { - - if (wbSize > noConditionalTaskThreshold) { - LOG.warn("adjusting hash table write buffer size to be smaller than noconditionaltasksize"); - wbSize = (int) noConditionalTaskThreshold; - } - + private HybridHashTableContainer(float keyCountAdj, int threshold, float loadFactor, + int memCheckFreq, int minWbSize, int minNumParts, + long estimatedTableSize, long keyCount, + long memoryAvailable, HybridHashTableConf nwayConf) + throws SerDeException, IOException { directWriteHelper = new MapJoinBytesTableContainer.DirectKeyValueWriter(); int newKeyCount = HashMapWrapper.calculateTableSize( keyCountAdj, threshold, loadFactor, keyCount); - memoryThreshold = noConditionalTaskThreshold; - tableRowSize = tableSize / newKeyCount; + memoryThreshold = memoryAvailable; + tableRowSize = estimatedTableSize / keyCount; memoryCheckFrequency = memCheckFreq; - int numPartitions = calcNumPartitions(tableSize, wbSize); // estimate # of partitions to create + this.nwayConf = nwayConf; + int numPartitions; + if (nwayConf == null) { // binary join + numPartitions = calcNumPartitions(memoryAvailable, estimatedTableSize, minWbSize, minNumParts, nwayConf); + writeBufferSize = (int)(estimatedTableSize / numPartitions); + } else { // n-way join + // It has been calculated in HashTableLoader earlier, so just need to retrieve that number + numPartitions = nwayConf.getNumberOfPartitions(); + if (nwayConf.getLoadedContainerList().size() == 0) { // n-way: first small table + writeBufferSize = (int)(estimatedTableSize / numPartitions); + } else { // n-way: all later small tables + while (memoryThreshold < numPartitions * minNumParts) { + // Spill previously loaded tables to make more room + long memFreed = nwayConf.spill(); + if (memFreed == 0) { + throw new RuntimeException("Available memory is not enough to create" + + " HybridHashTableContainers consistently!"); + } else { + LOG.info("Total available memory was: " + memoryThreshold); + memoryThreshold += memFreed; + LOG.info("Total available memory is: " + memoryThreshold); + } + } + writeBufferSize = (int)(memoryThreshold / numPartitions); + } + } + writeBufferSize = writeBufferSize < minWbSize ? minWbSize : writeBufferSize; + LOG.info("Write buffer size: " + writeBufferSize); hashPartitions = new HashPartition[numPartitions]; int numPartitionsSpilledOnCreation = 0; - long memoryAllocated = 0; + memoryUsed = 0; int initialCapacity = Math.max(newKeyCount / numPartitions, threshold / numPartitions); for (int i = 0; i < numPartitions; i++) { - if (i == 0) { // We unconditionally create a hashmap for the first hash partition - hashPartitions[i] = new HashPartition(initialCapacity, loadFactor, wbSize, memUsage, true); - } else { - hashPartitions[i] = new HashPartition(initialCapacity, loadFactor, wbSize, memUsage, - memoryAllocated + wbSize < memoryThreshold); + if (this.nwayConf == null || // binary join + nwayConf.getLoadedContainerList().size() == 0) { // n-way join, first (biggest) small table + if (i == 0) { // We unconditionally create a hashmap for the first hash partition + hashPartitions[i] = new HashPartition(initialCapacity, loadFactor, writeBufferSize, memoryThreshold, true); + } else { + hashPartitions[i] = new HashPartition(initialCapacity, loadFactor, writeBufferSize, memoryThreshold, + memoryUsed + writeBufferSize < memoryThreshold); + } + } else { // n-way join + // For all later small tables, follow the same pattern of the previously loaded tables. + if (this.nwayConf.doSpillOnCreation(i)) { + hashPartitions[i] = new HashPartition(threshold, loadFactor, writeBufferSize, memoryThreshold, false); + } else { + hashPartitions[i] = new HashPartition(threshold, loadFactor, writeBufferSize, memoryThreshold, true); + } } + if (isHashMapSpilledOnCreation(i)) { numPartitionsSpilledOnCreation++; numPartitionsSpilled++; this.setSpill(true); + if (this.nwayConf != null && this.nwayConf.getNextSpillPartition() == numPartitions - 1) { + this.nwayConf.setNextSpillPartition(i - 1); + } } else { - memoryAllocated += hashPartitions[i].hashMap.memorySize(); + memoryUsed += hashPartitions[i].hashMap.memorySize(); } } assert numPartitionsSpilledOnCreation != numPartitions : "All partitions are directly spilled!" + @@ -236,6 +277,11 @@ private HybridHashTableContainer(float keyCountAdj, int threshold, float loadFac LOG.info("Number of partitions created: " + numPartitions); LOG.info("Number of partitions spilled directly to disk on creation: " + numPartitionsSpilledOnCreation); + + // Append this container to the loaded list + if (this.nwayConf != null) { + this.nwayConf.getLoadedContainerList().add(this); + } } @@ -251,6 +297,20 @@ public long getMemoryThreshold() { return memoryThreshold; } + /** + * Get the current memory usage by recalculating it. + * @return current memory usage + */ + public long refreshMemoryUsed() { + long memUsed = 0; + for (HashPartition hp : hashPartitions) { + if (hp.hashMap != null) { + memUsed += hp.hashMap.memorySize(); + } + } + return memoryUsed = memUsed; + } + public LazyBinaryStructObjectInspector getInternalValueOi() { return internalValueOi; } @@ -313,10 +373,16 @@ private MapJoinKey internalPutRow(KeyValueHelper keyValueHelper, LOG.warn("This LAST partition in memory won't be spilled!"); lastPartitionInMem = true; } else { - int biggest = biggestPartition(); + if (nwayConf == null) { // binary join + int biggest = biggestPartition(); + spillPartition(biggest); + this.setSpill(true); + } else { // n-way join + LOG.info("N-way spilling: spill tail partition from previously loaded small tables"); + memoryThreshold += nwayConf.spill(); + LOG.info("Memory threshold has been increased to: " + memoryThreshold); + } numPartitionsSpilled++; - spillPartition(biggest); - this.setSpill(true); } } } @@ -349,13 +415,7 @@ public boolean isHashMapSpilledOnCreation(int partitionId) { * @return true if memory is full, false if not */ private boolean isMemoryFull() { - long size = 0; - for (int i = 0; i < hashPartitions.length; i++) { - if (!isOnDisk(i)) { - size += hashPartitions[i].hashMap.memorySize(); - } - } - return size >= memoryThreshold; + return refreshMemoryUsed() >= memoryThreshold; } /** @@ -385,11 +445,11 @@ private int biggestPartition() { /** * Move the hashtable of a specified partition from memory into local file system * @param partitionId the hashtable to be moved + * @return amount of memory freed */ - private void spillPartition(int partitionId) throws IOException { + public long spillPartition(int partitionId) throws IOException { HashPartition partition = hashPartitions[partitionId]; int inMemRowCount = partition.hashMap.getNumValues(); - long inMemSize = partition.hashMap.memorySize(); Path path = Files.createTempFile("partition-" + partitionId + "-", null); OutputStream outputStream = Files.newOutputStream(path); @@ -403,57 +463,73 @@ private void spillPartition(int partitionId) throws IOException { partition.hashMapLocalPath = path; partition.hashMapOnDisk = true; - long size = 0; - for (int i = 0; i < hashPartitions.length; i++) { - if (!isOnDisk(i)) { - size += hashPartitions[i].hashMap.memorySize(); - } - } LOG.info("Spilling hash partition " + partitionId + " (Rows: " + inMemRowCount + - ", Mem size: " + inMemSize + "): " + path); - LOG.info("Memory usage before spilling: " + size); - LOG.info("Memory usage after spilling: " + (size - inMemSize)); + ", Mem size: " + partition.hashMap.memorySize() + "): " + path); + LOG.info("Memory usage before spilling: " + memoryUsed); + + long memFreed = partition.hashMap.memorySize(); + memoryUsed -= memFreed; + LOG.info("Memory usage after spilling: " + memoryUsed); totalInMemRowCount -= inMemRowCount; partition.hashMap.clear(); + return memFreed; } /** - * Calculate how many partitions are needed. This is an estimation. + * Calculate how many partitions are needed. + * For n-way join, we only do this calculation once in the HashTableLoader, for the biggest small + * table. Other small tables will use the same number. They may need to adjust (usually reduce) + * their individual write buffer size in order not to exceed memory threshold. + * @param memoryThreshold memory threshold for the given table * @param dataSize total data size for the table - * @param wbSize write buffer size + * @param minNumParts minimum required number of partitions + * @param minWbSize minimum required write buffer size + * @param nwayConf the n-way join configuration * @return number of partitions needed */ - private int calcNumPartitions(long dataSize, int wbSize) { - if (memoryThreshold < wbSize) { - throw new IllegalStateException("Available memory is less than hashtable writebuffer size!" - + " Try increasing hive.auto.convert.join.noconditionaltask.size."); + public static int calcNumPartitions(long memoryThreshold, long dataSize, int minNumParts, + int minWbSize, HybridHashTableConf nwayConf) throws IOException { + int numPartitions = minNumParts; + + if (nwayConf == null || // binary join + nwayConf.getNumberOfPartitions() == 0) { // n-way join, the first small table + if (memoryThreshold < minNumParts * minWbSize) { + LOG.warn("Available memory is not enough to create a HybridHashTableContainer!"); + } + if (memoryThreshold < dataSize) { + while (dataSize / numPartitions > memoryThreshold) { + numPartitions *= 2; + } + } + } else { // Just reuse the same number as calculated before for the first small table + numPartitions = nwayConf.getNumberOfPartitions(); + LOG.info("Original total available memory: " + memoryThreshold); + while (memoryThreshold < numPartitions * minNumParts) { + // Spill previously loaded tables to make more room + long memFreed = nwayConf.spill(); + if (memFreed == 0) { + throw new RuntimeException("Available memory is not enough to create" + + " HybridHashTableContainers consistently!"); + } else { + LOG.info("Total available memory was: " + memoryThreshold); + memoryThreshold += memFreed; + LOG.info("Total available memory is: " + memoryThreshold); + } + } } - int lowerLimit = 2; - int numPartitions = (int) Math.ceil(dataSize / wbSize); - LOG.info("Total available memory: " + memoryThreshold); LOG.info("Estimated small table size: " + dataSize); - LOG.info("Write buffer size: " + wbSize); - LOG.info("Initial number of partitions: " + numPartitions); - - if (numPartitions < lowerLimit) { - return lowerLimit; - } else if (dataSize > memoryThreshold) { - numPartitions = (int) (memoryThreshold / wbSize); - } - // Make sure numPartitions is power of 2, to make N & (M - 1) easy when calculating partition No. - numPartitions = (Long.bitCount(numPartitions) == 1) ? numPartitions - : Integer.highestOneBit(numPartitions) << 1; - while (dataSize / numPartitions > memoryThreshold) { - numPartitions *= 2; - } - LOG.info("Number of hash partitions to be created: " + numPartitions); return numPartitions; } + /* Get number of partitions */ + public int getNumPartitions() { + return hashPartitions.length; + } + /* Get total number of rows from all in memory partitions */ public int getTotalInMemRowCount() { return totalInMemRowCount; @@ -494,6 +570,7 @@ public void clear() { hp.hashMap.clear(); } } + memoryUsed = 0; } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java index fe108c4..043f1f7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java @@ -69,7 +69,7 @@ public void init(ExecMapperContext context, MapredContext mrContext, Configurati @Override public void load(MapJoinTableContainer[] mapJoinTables, - MapJoinTableContainerSerDe[] mapJoinTableSerdes, long memUsage) + MapJoinTableContainerSerDe[] mapJoinTableSerdes) throws HiveException { // Note: it's possible that a MJ operator is in a ReduceWork, in which case the diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java index ba5a797..0cf36d4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.ql.exec.MapredContext; import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; import org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper; +import org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableConf; import org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; @@ -69,7 +70,7 @@ public void init(ExecMapperContext context, MapredContext mrContext, Configurati @Override public void load(MapJoinTableContainer[] mapJoinTables, - MapJoinTableContainerSerDe[] mapJoinTableSerdes, long memUsage) + MapJoinTableContainerSerDe[] mapJoinTableSerdes) throws HiveException { Map parentToInput = desc.getParentToInput(); @@ -79,10 +80,43 @@ public void load(MapJoinTableContainer[] mapJoinTables, hconf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE); boolean useHybridGraceHashJoin = desc.isHybridHashJoin(); boolean isFirstKey = true; + // TODO remove this after memory manager is in + long noConditionalTaskThreshold = HiveConf.getLongVar( + hconf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD); + + // Only applicable to n-way Hybrid Grace Hash Join + HybridHashTableConf nwayConf = null; + long totalSize = 0; + int biggest = 0; // position of the biggest small table + if (useHybridGraceHashJoin && mapJoinTables.length > 2) { + // Create a Conf for n-way HybridHashTableContainers + nwayConf = new HybridHashTableConf(); + + // Find the biggest small table; also calculate total data size of all small tables + long maxSize = 0; // the size of the biggest small table + for (int pos = 0; pos < mapJoinTables.length; pos++) { + if (pos == desc.getPosBigTable()) { + continue; + } + totalSize += desc.getParentDataSizes().get(pos); + biggest = desc.getParentDataSizes().get(pos) > maxSize ? pos : biggest; + maxSize = desc.getParentDataSizes().get(pos) > maxSize ? desc.getParentDataSizes().get(pos) + : maxSize; + } - // Disable hybrid grace hash join for n-way join - if (mapJoinTables.length > 2) { - useHybridGraceHashJoin = false; + // Using biggest small table, calculate number of partitions to create for each small table + float percentage = (float) desc.getParentDataSizes().get(biggest) / totalSize; + long memory = (long) (noConditionalTaskThreshold * percentage); + int numPartitions = 0; + try { + numPartitions = HybridHashTableContainer.calcNumPartitions(memory, desc.getParentDataSizes().get(biggest), + HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINWBSIZE), + HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS), + nwayConf); + } catch (IOException e) { + throw new HiveException(e); + } + nwayConf.setNumberOfPartitions(numPartitions); } for (int pos = 0; pos < mapJoinTables.length; pos++) { @@ -122,10 +156,21 @@ public void load(MapJoinTableContainer[] mapJoinTables, Long keyCountObj = parentKeyCounts.get(pos); long keyCount = (keyCountObj == null) ? -1 : keyCountObj.longValue(); + long memory = 0; + if (useHybridGraceHashJoin) { + if (mapJoinTables.length > 2) { + // Allocate n-way join memory proportionally + float percentage = (float) desc.getParentDataSizes().get(pos) / totalSize; + memory = (long) (noConditionalTaskThreshold * percentage); + } else { // binary join + memory = noConditionalTaskThreshold; + } + } + MapJoinTableContainer tableContainer = useOptimizedTables - ? (useHybridGraceHashJoin ? new HybridHashTableContainer(hconf, keyCount, memUsage, - desc.getParentDataSizes().get(pos)) - : new MapJoinBytesTableContainer(hconf, valCtx, keyCount, memUsage)) + ? (useHybridGraceHashJoin ? new HybridHashTableContainer(hconf, keyCount, + memory, desc.getParentDataSizes().get(pos), nwayConf) + : new MapJoinBytesTableContainer(hconf, valCtx, keyCount, 0)) : new HashMapWrapper(hconf, keyCount); LOG.info("Using tableContainer " + tableContainer.getClass().getSimpleName()); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java index 534a906..0547346 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java @@ -311,8 +311,10 @@ protected void spillBigTableRow(MapJoinTableContainer hybridHtContainer, Object } @Override - protected void reProcessBigTable(HybridHashTableContainer.HashPartition partition) + protected void reProcessBigTable(int partitionId) throws HiveException { + + HybridHashTableContainer.HashPartition partition = firstSmallTable.getHashPartitions()[partitionId]; ObjectContainer bigTable = partition.getMatchfileObjContainer(); DataOutputBuffer dataOutputBuffer = new DataOutputBuffer(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java index f272b6d..f9d5736 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java @@ -744,10 +744,6 @@ protected void commonSetup(VectorizedRowBatch batch) throws HiveException { // Setup a scratch batch that will be used to play back big table rows that were spilled // to disk for the Hybrid Grace hash partitioning. spillReplayBatch = VectorizedBatchUtil.makeLike(batch); - - // TEMPORARY -- Set this up for Hybrid Grace logic in MapJoinOperator.closeOp - hashMapRowGetters = new ReusableGetAdaptor[mapJoinTables.length]; - smallTable = posSingleVectorMapJoinSmallTable; } protected void displayBatchColumns(VectorizedRowBatch batch, String batchName) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java index 743a975..70c8cb1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java @@ -27,6 +27,8 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer.HashPartition; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; import org.apache.hadoop.hive.ql.exec.vector.VectorSerializeRow; @@ -449,7 +451,7 @@ private void spillSerializeRow(VectorizedRowBatch batch, int batchIndex, int partitionId = hashTableResult.spillPartitionId(); - HybridHashTableContainer ht = (HybridHashTableContainer) mapJoinTables[smallTable]; + HybridHashTableContainer ht = (HybridHashTableContainer) mapJoinTables[posSingleVectorMapJoinSmallTable]; HashPartition hp = ht.getHashPartitions()[partitionId]; VectorMapJoinRowBytesContainer rowBytesContainer = hp.getMatchfileRowBytesContainer(); @@ -499,27 +501,30 @@ protected void spillBatchRepeated(VectorizedRowBatch batch, } @Override - protected void reloadHashTable(HashPartition partition, - HybridHashTableContainer hybridHtContainer) - throws IOException, ClassNotFoundException, HiveException, SerDeException { + protected void reloadHashTable(byte pos, int partitionId) + throws IOException, HiveException, SerDeException, ClassNotFoundException { - // The super method will reload a hash table partition and - // put a single MapJoinBytesTableContainer into the currentSmallTable member. - super.reloadHashTable(partition, hybridHtContainer); + // The super method will reload a hash table partition of one of the small tables. + // Currently, for native vector map join it will only be one small table. + super.reloadHashTable(pos, partitionId); + + MapJoinTableContainer smallTable = spilledMapJoinTables[pos]; vectorMapJoinHashTable = VectorMapJoinOptimizedCreateHashTable.createHashTable(conf, - currentSmallTable); + smallTable); needHashTableSetup = true; LOG.info(CLASS_NAME + " reloadHashTable!"); } @Override - protected void reProcessBigTable(HybridHashTableContainer.HashPartition partition) - throws HiveException, IOException { + protected void reProcessBigTable(int partitionId) + throws HiveException { LOG.info(CLASS_NAME + " reProcessBigTable enter..."); + HashPartition partition = firstSmallTable.getHashPartitions()[partitionId]; + int rowCount = 0; int batchCount = 0; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java index 0796406..f9550c9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java @@ -90,8 +90,8 @@ public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, } public VectorMapJoinFastBytesHashMap( - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) { - super(initialCapacity, loadFactor, writeBuffersSize, memUsage); + int initialCapacity, float loadFactor, int writeBuffersSize) { + super(initialCapacity, loadFactor, writeBuffersSize); valueStore = new VectorMapJoinFastValueStore(writeBuffersSize); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java index d685c22..9dcaf8f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java @@ -85,8 +85,8 @@ public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, } public VectorMapJoinFastBytesHashMultiSet( - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) { - super(initialCapacity, loadFactor, writeBuffersSize, memUsage); + int initialCapacity, float loadFactor, int writeBuffersSize) { + super(initialCapacity, loadFactor, writeBuffersSize); keyStore = new VectorMapJoinFastKeyStore(writeBuffersSize); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java index 9f20fdc..9f122c4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java @@ -77,8 +77,8 @@ public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, } public VectorMapJoinFastBytesHashSet( - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) { - super(initialCapacity, loadFactor, writeBuffersSize, memUsage); + int initialCapacity, float loadFactor, int writeBuffersSize) { + super(initialCapacity, loadFactor, writeBuffersSize); keyStore = new VectorMapJoinFastKeyStore(writeBuffersSize); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java index 594a77f..b6e6321 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java @@ -214,8 +214,8 @@ private void allocateBucketArray() { } public VectorMapJoinFastBytesHashTable( - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) { - super(initialCapacity, loadFactor, writeBuffersSize, memUsage); + int initialCapacity, float loadFactor, int writeBuffersSize) { + super(initialCapacity, loadFactor, writeBuffersSize); allocateBucketArray(); } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashMap.java index b37247c..262b619 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashMap.java @@ -32,7 +32,7 @@ public VectorMapJoinHashMapResult createHashMapResult() { public VectorMapJoinFastHashMap( boolean isOuterJoin, - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) { - super(initialCapacity, loadFactor, writeBuffersSize, memUsage); + int initialCapacity, float loadFactor, int writeBuffersSize) { + super(initialCapacity, loadFactor, writeBuffersSize); } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashMultiSet.java index 5569f6e..5f7c6a7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashMultiSet.java @@ -42,7 +42,7 @@ public void set(long count) { public VectorMapJoinFastHashMultiSet( boolean isOuterJoin, - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) { - super(initialCapacity, loadFactor, writeBuffersSize, memUsage); + int initialCapacity, float loadFactor, int writeBuffersSize) { + super(initialCapacity, loadFactor, writeBuffersSize); } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashSet.java index 0738df3..8509971 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashSet.java @@ -38,7 +38,7 @@ public VectorMapJoinHashSetResult createHashSetResult() { public VectorMapJoinFastHashSet( boolean isOuterJoin, - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) { - super(initialCapacity, loadFactor, writeBuffersSize, memUsage); + int initialCapacity, float loadFactor, int writeBuffersSize) { + super(initialCapacity, loadFactor, writeBuffersSize); } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java index 33e34fa..fbe6b4c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java @@ -30,7 +30,6 @@ protected float loadFactor; protected int writeBuffersSize; - protected long memUsage; protected int metricPutConflict; protected int largestNumberOfSteps; @@ -52,7 +51,7 @@ private static int nextHighestPowerOfTwo(int v) { } public VectorMapJoinFastHashTable( - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) { + int initialCapacity, float loadFactor, int writeBuffersSize) { initialCapacity = (Long.bitCount(initialCapacity) == 1) ? initialCapacity : nextHighestPowerOfTwo(initialCapacity); @@ -65,6 +64,5 @@ public VectorMapJoinFastHashTable( this.loadFactor = loadFactor; this.writeBuffersSize = writeBuffersSize; - this.memUsage = memUsage; } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java index 92b5d40..4edf604 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java @@ -62,7 +62,7 @@ public void init(ExecMapperContext context, MapredContext mrContext, @Override public void load(MapJoinTableContainer[] mapJoinTables, - MapJoinTableContainerSerDe[] mapJoinTableSerdes, long memUsage) + MapJoinTableContainerSerDe[] mapJoinTableSerdes) throws HiveException { Map parentToInput = desc.getParentToInput(); @@ -91,7 +91,7 @@ public void load(MapJoinTableContainer[] mapJoinTables, long keyCount = (keyCountObj == null) ? -1 : keyCountObj.longValue(); VectorMapJoinFastTableContainer vectorMapJoinFastTableContainer = - new VectorMapJoinFastTableContainer(desc, hconf, keyCount, memUsage); + new VectorMapJoinFastTableContainer(desc, hconf, keyCount); while (kvReader.next()) { vectorMapJoinFastTableContainer.putRow( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java index 3a0b380..d6ad028 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java @@ -86,9 +86,9 @@ public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable curre public VectorMapJoinFastLongHashMap( boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType, - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) { + int initialCapacity, float loadFactor, int writeBuffersSize) { super(minMaxEnabled, isOuterJoin, hashTableKeyType, - initialCapacity, loadFactor, writeBuffersSize, memUsage); + initialCapacity, loadFactor, writeBuffersSize); valueStore = new VectorMapJoinFastValueStore(writeBuffersSize); } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java index f9763e3..e447551 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java @@ -84,8 +84,8 @@ public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable curre public VectorMapJoinFastLongHashMultiSet( boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType, - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) { + int initialCapacity, float loadFactor, int writeBuffersSize) { super(minMaxEnabled, isOuterJoin, hashTableKeyType, - initialCapacity, loadFactor, writeBuffersSize, memUsage); + initialCapacity, loadFactor, writeBuffersSize); } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java index cd23949..aa44e60 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java @@ -77,8 +77,8 @@ public JoinResult contains(long key, VectorMapJoinHashSetResult hashSetResult) { public VectorMapJoinFastLongHashSet( boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType, - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) { + int initialCapacity, float loadFactor, int writeBuffersSize) { super(minMaxEnabled, isOuterJoin, hashTableKeyType, - initialCapacity, loadFactor, writeBuffersSize, memUsage); + initialCapacity, loadFactor, writeBuffersSize); } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java index b448e1f..2137fb7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java @@ -270,8 +270,8 @@ private void allocateBucketArray() { public VectorMapJoinFastLongHashTable( boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType, - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) { - super(initialCapacity, loadFactor, writeBuffersSize, memUsage); + int initialCapacity, float loadFactor, int writeBuffersSize) { + super(initialCapacity, loadFactor, writeBuffersSize); this.isOuterJoin = isOuterJoin; this.hashTableKeyType = hashTableKeyType; PrimitiveTypeInfo[] primitiveTypeInfos = { TypeInfoFactory.longTypeInfo }; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java index b962475..9a9fb8d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java @@ -28,12 +28,12 @@ @VisibleForTesting public VectorMapJoinFastMultiKeyHashMap(int initialCapacity, float loadFactor, int wbSize) { - this(false, initialCapacity, loadFactor, wbSize, -1); + this(false, initialCapacity, loadFactor, wbSize); } public VectorMapJoinFastMultiKeyHashMap( boolean isOuterJoin, - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) { - super(initialCapacity, loadFactor, writeBuffersSize, memUsage); + int initialCapacity, float loadFactor, int writeBuffersSize) { + super(initialCapacity, loadFactor, writeBuffersSize); } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java index 71a62fe..a8744a5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java @@ -26,7 +26,7 @@ public VectorMapJoinFastMultiKeyHashMultiSet( boolean isOuterJoin, - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) { - super(initialCapacity, loadFactor, writeBuffersSize, memUsage); + int initialCapacity, float loadFactor, int writeBuffersSize) { + super(initialCapacity, loadFactor, writeBuffersSize); } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java index dad3b32..a8048e5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java @@ -26,7 +26,7 @@ public VectorMapJoinFastMultiKeyHashSet( boolean isOuterJoin, - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) { - super(initialCapacity, loadFactor, writeBuffersSize, memUsage); + int initialCapacity, float loadFactor, int writeBuffersSize) { + super(initialCapacity, loadFactor, writeBuffersSize); } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java index c80ea89..6f181b2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java @@ -37,8 +37,8 @@ public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws public VectorMapJoinFastStringHashMap( boolean isOuterJoin, - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) { - super(initialCapacity, loadFactor, writeBuffersSize, memUsage); + int initialCapacity, float loadFactor, int writeBuffersSize) { + super(initialCapacity, loadFactor, writeBuffersSize); stringCommon = new VectorMapJoinFastStringCommon(isOuterJoin); } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java index 4933b16..9653b71 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java @@ -37,8 +37,8 @@ public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws public VectorMapJoinFastStringHashMultiSet( boolean isOuterJoin, - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) { - super(initialCapacity, loadFactor, writeBuffersSize, memUsage); + int initialCapacity, float loadFactor, int writeBuffersSize) { + super(initialCapacity, loadFactor, writeBuffersSize); stringCommon = new VectorMapJoinFastStringCommon(isOuterJoin); } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java index ae8d943..6419a0b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java @@ -37,8 +37,8 @@ public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws public VectorMapJoinFastStringHashSet( boolean isOuterJoin, - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) { - super(initialCapacity, loadFactor, writeBuffersSize, memUsage); + int initialCapacity, float loadFactor, int writeBuffersSize) { + super(initialCapacity, loadFactor, writeBuffersSize); stringCommon = new VectorMapJoinFastStringCommon(isOuterJoin); } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java index 3789275..373b5f4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java @@ -56,13 +56,12 @@ private float loadFactor; private int wbSize; private long keyCount; - private long memUsage; private VectorMapJoinFastHashTable VectorMapJoinFastHashTable; public VectorMapJoinFastTableContainer(MapJoinDesc desc, Configuration hconf, - long keyCount, long memUsage) throws SerDeException { + long keyCount) throws SerDeException { this.desc = desc; this.hconf = hconf; @@ -73,13 +72,11 @@ public VectorMapJoinFastTableContainer(MapJoinDesc desc, Configuration hconf, wbSize = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEWBSIZE); this.keyCount = keyCount; - this.memUsage = memUsage; // LOG.info("VectorMapJoinFastTableContainer load keyCountAdj " + keyCountAdj); // LOG.info("VectorMapJoinFastTableContainer load threshold " + threshold); // LOG.info("VectorMapJoinFastTableContainer load loadFactor " + loadFactor); // LOG.info("VectorMapJoinFastTableContainer load wbSize " + wbSize); - // LOG.info("VectorMapJoinFastTableContainer load memUsage " + memUsage); int newThreshold = HashMapWrapper.calculateTableSize( keyCountAdj, threshold, loadFactor, keyCount); @@ -117,17 +114,17 @@ private VectorMapJoinFastHashTable createHashTable(int newThreshold) { case HASH_MAP: hashTable = new VectorMapJoinFastLongHashMap( minMaxEnabled, isOuterJoin, hashTableKeyType, - newThreshold, loadFactor, writeBufferSize, memUsage); + newThreshold, loadFactor, writeBufferSize); break; case HASH_MULTISET: hashTable = new VectorMapJoinFastLongHashMultiSet( minMaxEnabled, isOuterJoin, hashTableKeyType, - newThreshold, loadFactor, writeBufferSize, memUsage); + newThreshold, loadFactor, writeBufferSize); break; case HASH_SET: hashTable = new VectorMapJoinFastLongHashSet( minMaxEnabled, isOuterJoin, hashTableKeyType, - newThreshold, loadFactor, writeBufferSize, memUsage); + newThreshold, loadFactor, writeBufferSize); break; } break; @@ -137,17 +134,17 @@ private VectorMapJoinFastHashTable createHashTable(int newThreshold) { case HASH_MAP: hashTable = new VectorMapJoinFastStringHashMap( isOuterJoin, - newThreshold, loadFactor, writeBufferSize, memUsage); + newThreshold, loadFactor, writeBufferSize); break; case HASH_MULTISET: hashTable = new VectorMapJoinFastStringHashMultiSet( isOuterJoin, - newThreshold, loadFactor, writeBufferSize, memUsage); + newThreshold, loadFactor, writeBufferSize); break; case HASH_SET: hashTable = new VectorMapJoinFastStringHashSet( isOuterJoin, - newThreshold, loadFactor, writeBufferSize, memUsage); + newThreshold, loadFactor, writeBufferSize); break; } break; @@ -157,17 +154,17 @@ private VectorMapJoinFastHashTable createHashTable(int newThreshold) { case HASH_MAP: hashTable = new VectorMapJoinFastMultiKeyHashMap( isOuterJoin, - newThreshold, loadFactor, writeBufferSize, memUsage); + newThreshold, loadFactor, writeBufferSize); break; case HASH_MULTISET: hashTable = new VectorMapJoinFastMultiKeyHashMultiSet( isOuterJoin, - newThreshold, loadFactor, writeBufferSize, memUsage); + newThreshold, loadFactor, writeBufferSize); break; case HASH_SET: hashTable = new VectorMapJoinFastMultiKeyHashSet( isOuterJoin, - newThreshold, loadFactor, writeBufferSize, memUsage); + newThreshold, loadFactor, writeBufferSize); break; } break; diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java index 0192fb5..cee9100 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java @@ -67,7 +67,7 @@ private boolean isBucketMapJoin; // Hash table memory usage allowed; used in case of non-staged mapjoin. - private float hashtableMemoryUsage; + private float hashtableMemoryUsage; // This is a percentage value between 0 and 1 protected boolean genJoinKeys = true; private boolean isHybridHashJoin; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java index eb38b19..a45275b 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java @@ -35,7 +35,7 @@ public void testPutGetOne() throws Exception { random = new Random(47496); VectorMapJoinFastLongHashMap map = - new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, 0); + new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE); RandomLongStream randomLongKeyStream = new RandomLongStream(random); RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); @@ -55,7 +55,7 @@ public void testPutGetOne() throws Exception { public void testPutGetMultiple() throws Exception { random = new Random(2990); - VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, 0); + VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE); RandomLongStream randomLongKeyStream = new RandomLongStream(random); RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); @@ -77,7 +77,7 @@ public void testPutGetMultiple() throws Exception { public void testGetNonExistent() throws Exception { random = new Random(16916); - VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, 0); + VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE); RandomLongStream randomLongKeyStream = new RandomLongStream(random); RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); @@ -101,7 +101,7 @@ public void testPutWithFullMap() throws Exception { random = new Random(26078); // Make sure the map does not expand; should be able to find space. - VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, 1f, WB_SIZE, 0); + VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, 1f, WB_SIZE); RandomLongStream randomLongKeyStream = new RandomLongStream(random); RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); @@ -126,7 +126,7 @@ public void testExpand() throws Exception { random = new Random(22470); // Start with capacity 1; make sure we expand on every put. - VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE, 0); + VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE); RandomLongStream randomLongKeyStream = new RandomLongStream(random); RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); @@ -147,7 +147,7 @@ public void testLarge() throws Exception { random = new Random(40719); // Use a large capacity that doesn't require expansion, yet. - VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, 0); + VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE); RandomLongStream randomLongKeyStream = new RandomLongStream(random); @@ -172,7 +172,7 @@ public void testLargeAndExpand() throws Exception { random = new Random(46809); // Use a large capacity that doesn't require expansion, yet. - VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, 0); + VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE); RandomLongStream randomLongKeyStream = new RandomLongStream(random); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastMultiKeyHashMap.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastMultiKeyHashMap.java index 3c1b29a..944bda6 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastMultiKeyHashMap.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastMultiKeyHashMap.java @@ -35,7 +35,7 @@ public void testPutGetOne() throws Exception { random = new Random(47496); VectorMapJoinFastMultiKeyHashMap map = - new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE, 0); + new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE); RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random); RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); @@ -55,7 +55,7 @@ public void testPutGetOne() throws Exception { public void testPutGetMultiple() throws Exception { random = new Random(2990); - VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE, 0); + VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE); RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random); RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); @@ -77,7 +77,7 @@ public void testPutGetMultiple() throws Exception { public void testGetNonExistent() throws Exception { random = new Random(16916); - VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE, 0); + VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE); RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random); RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); @@ -101,7 +101,7 @@ public void testPutWithFullMap() throws Exception { random = new Random(26078); // Make sure the map does not expand; should be able to find space. - VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, 1f, WB_SIZE, 0); + VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, 1f, WB_SIZE); RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random); RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); @@ -126,7 +126,7 @@ public void testExpand() throws Exception { random = new Random(22470); // Start with capacity 1; make sure we expand on every put. - VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, 1, 0.0000001f, WB_SIZE, 0); + VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, 1, 0.0000001f, WB_SIZE); RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random); RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); @@ -147,7 +147,7 @@ public void testLarge() throws Exception { random = new Random(5231); // Use a large capacity that doesn't require expansion, yet. - VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, 0); + VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE); RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random, 10); @@ -178,7 +178,7 @@ public void testLargeAndExpand() throws Exception { random = new Random(46809); // Use a large capacity that doesn't require expansion, yet. - VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, 0); + VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE); RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random, 10); diff --git ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q index 096c890..e92504a 100644 --- ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q +++ ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q @@ -42,6 +42,8 @@ select * from dest2; set hive.auto.convert.join.noconditionaltask=true; set hive.auto.convert.join.noconditionaltask.size=200; +set hive.mapjoin.hybridgrace.minwbsize=100; +set hive.mapjoin.hybridgrace.minnumpartitions=2; -- A SMB join followed by a mutli-insert explain diff --git ql/src/test/queries/clientpositive/hybridgrace_hashjoin_1.q ql/src/test/queries/clientpositive/hybridgrace_hashjoin_1.q new file mode 100644 index 0000000..e4f0f85 --- /dev/null +++ ql/src/test/queries/clientpositive/hybridgrace_hashjoin_1.q @@ -0,0 +1,258 @@ +-- Hybrid Grace Hash Join +-- Test basic functionalities: +-- 1. Various cases when hash partitions spill +-- 2. Partitioned table spilling +-- 3. Vectorization + +SHOW DATABASES; + +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask.size=1300000; +set hive.mapjoin.optimized.hashtable.wbsize=880000; +set hive.mapjoin.hybridgrace.memcheckfrequency=1024; + +set hive.mapjoin.hybridgrace.hashtable=false; + +-- Base result for inner join +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint + where c.cint < 2000000000) t1 +; + +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint + where c.cint < 2000000000) t1 +; + +set hive.mapjoin.hybridgrace.hashtable=true; + +-- Two partitions are created. One in memory, one on disk on creation. +-- The one in memory will eventually exceed memory limit, but won't spill. +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint + where c.cint < 2000000000) t1 +; + +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint + where c.cint < 2000000000) t1 +; + +set hive.auto.convert.join.noconditionaltask.size=3000000; +set hive.mapjoin.optimized.hashtable.wbsize=100000; + +set hive.mapjoin.hybridgrace.hashtable=false; + +-- Base result for inner join +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint) t1 +; + +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint) t1 +; + +set hive.mapjoin.hybridgrace.hashtable=true; + +-- 16 partitions are created: 3 in memory, 13 on disk on creation. +-- 1 partition is spilled during first round processing, which ends up having 2 in memory, 14 on disk +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint) t1 +; + +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint) t1 +; + + + +set hive.mapjoin.hybridgrace.hashtable=false; + +-- Base result for outer join +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + left outer join alltypesorc cd + on cd.cint = c.cint) t1 +; + +select count(*) from +(select c.ctinyint + from alltypesorc c + left outer join alltypesorc cd + on cd.cint = c.cint) t1 +; + +set hive.mapjoin.hybridgrace.hashtable=true; + +-- 32 partitions are created. 3 in memory, 29 on disk on creation. +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + left outer join alltypesorc cd + on cd.cint = c.cint) t1 +; + +select count(*) from +(select c.ctinyint + from alltypesorc c + left outer join alltypesorc cd + on cd.cint = c.cint) t1 +; + + +-- Partitioned table +create table parttbl (key string, value char(20)) partitioned by (dt char(10)); +insert overwrite table parttbl partition(dt='2000-01-01') + select * from src; +insert overwrite table parttbl partition(dt='2000-01-02') + select * from src1; + +set hive.auto.convert.join.noconditionaltask.size=30000000; +set hive.mapjoin.optimized.hashtable.wbsize=10000000; + +set hive.mapjoin.hybridgrace.hashtable=false; + +-- No spill, base result +explain +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +; + +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +; + +set hive.mapjoin.hybridgrace.hashtable=true; + +-- No spill, 2 partitions created in memory +explain +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +; + +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +; + + +set hive.auto.convert.join.noconditionaltask.size=20000; +set hive.mapjoin.optimized.hashtable.wbsize=10000; + +set hive.mapjoin.hybridgrace.hashtable=false; + +-- Spill case base result +explain +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +; + +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +; + +set hive.mapjoin.hybridgrace.hashtable=true; + +-- Spill case, one partition in memory, one spilled on creation +explain +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +; + +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +; + +drop table parttbl; + + +-- Test vectorization +-- Test case borrowed from vector_decimal_mapjoin.q +CREATE TABLE decimal_mapjoin STORED AS ORC AS + SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, + CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2, + cint + FROM alltypesorc; + +SET hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask=true; +SET hive.auto.convert.join.noconditionaltask.size=50000000; +set hive.mapjoin.optimized.hashtable.wbsize=10000; +SET hive.vectorized.execution.enabled=true; +set hive.mapjoin.hybridgrace.hashtable=false; + +EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 + FROM decimal_mapjoin l + JOIN decimal_mapjoin r ON l.cint = r.cint + WHERE l.cint = 6981; +SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 + FROM decimal_mapjoin l + JOIN decimal_mapjoin r ON l.cint = r.cint + WHERE l.cint = 6981; + +set hive.mapjoin.hybridgrace.hashtable=true; + +EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 + FROM decimal_mapjoin l + JOIN decimal_mapjoin r ON l.cint = r.cint + WHERE l.cint = 6981; +SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 + FROM decimal_mapjoin l + JOIN decimal_mapjoin r ON l.cint = r.cint + WHERE l.cint = 6981; + +DROP TABLE decimal_mapjoin; diff --git ql/src/test/queries/clientpositive/hybridgrace_hashjoin_2.q ql/src/test/queries/clientpositive/hybridgrace_hashjoin_2.q new file mode 100644 index 0000000..2ccc4a9 --- /dev/null +++ ql/src/test/queries/clientpositive/hybridgrace_hashjoin_2.q @@ -0,0 +1,151 @@ +-- Hybrid Grace Hash Join +-- Test n-way join +SHOW DATABASES; + +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.cbo.enable=false; + + +-- 3-way mapjoin (1 big table, 2 small tables) +SHOW DATABASES; + +set hive.mapjoin.hybridgrace.hashtable=false; + +EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key); + +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key); + +set hive.mapjoin.hybridgrace.hashtable=true; + +EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key); + +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key); + + +-- 4-way mapjoin (1 big table, 3 small tables) +SHOW DATABASES; + +set hive.mapjoin.hybridgrace.hashtable=false; + +EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN srcpart w ON (x.key = w.key) +JOIN src y ON (y.key = x.key); + +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN srcpart w ON (x.key = w.key) +JOIN src y ON (y.key = x.key); + +set hive.mapjoin.hybridgrace.hashtable=true; + +EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN srcpart w ON (x.key = w.key) +JOIN src y ON (y.key = x.key); + +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN srcpart w ON (x.key = w.key) +JOIN src y ON (y.key = x.key); + + +-- 2 sets of 3-way mapjoin under 2 different tasks +SHOW DATABASES; + +set hive.mapjoin.hybridgrace.hashtable=false; + +EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key) +UNION +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.value = z.value) +JOIN src y ON (y.value = x.value); + +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key) +UNION +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.value = z.value) +JOIN src y ON (y.value = x.value); + +set hive.mapjoin.hybridgrace.hashtable=true; + +EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key) +UNION +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.value = z.value) +JOIN src y ON (y.value = x.value); + +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key) +UNION +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.value = z.value) +JOIN src y ON (y.value = x.value); + + +-- A chain of 2 sets of 3-way mapjoin under the same task +SHOW DATABASES; + +set hive.mapjoin.hybridgrace.hashtable=false; + +EXPLAIN +SELECT COUNT(*) +FROM src1 x +JOIN srcpart z1 ON (x.key = z1.key) +JOIN src y1 ON (x.key = y1.key) +JOIN srcpart z2 ON (x.value = z2.value) +JOIN src y2 ON (x.value = y2.value) +WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz' + AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz'; + +SELECT COUNT(*) +FROM src1 x +JOIN srcpart z1 ON (x.key = z1.key) +JOIN src y1 ON (x.key = y1.key) +JOIN srcpart z2 ON (x.value = z2.value) +JOIN src y2 ON (x.value = y2.value) +WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz' + AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz'; + +set hive.mapjoin.hybridgrace.hashtable=true; + +EXPLAIN +SELECT COUNT(*) +FROM src1 x +JOIN srcpart z1 ON (x.key = z1.key) +JOIN src y1 ON (x.key = y1.key) +JOIN srcpart z2 ON (x.value = z2.value) +JOIN src y2 ON (x.value = y2.value) +WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz' + AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz'; + +SELECT COUNT(*) +FROM src1 x +JOIN srcpart z1 ON (x.key = z1.key) +JOIN src y1 ON (x.key = y1.key) +JOIN srcpart z2 ON (x.value = z2.value) +JOIN src y2 ON (x.value = y2.value) +WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz' + AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz'; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/hybridhashjoin.q ql/src/test/queries/clientpositive/hybridhashjoin.q deleted file mode 100644 index fbd48ea..0000000 --- ql/src/test/queries/clientpositive/hybridhashjoin.q +++ /dev/null @@ -1,250 +0,0 @@ -set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask.size=1300000; -set hive.mapjoin.optimized.hashtable.wbsize=880000; -set hive.mapjoin.hybridgrace.memcheckfrequency=1024; - -set hive.mapjoin.hybridgrace.hashtable=false; - --- Base result for inner join -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint - where c.cint < 2000000000) t1 -; - -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint - where c.cint < 2000000000) t1 -; - -set hive.mapjoin.hybridgrace.hashtable=true; - --- Two partitions are created. One in memory, one on disk on creation. --- The one in memory will eventually exceed memory limit, but won't spill. -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint - where c.cint < 2000000000) t1 -; - -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint - where c.cint < 2000000000) t1 -; - -set hive.auto.convert.join.noconditionaltask.size=3000000; -set hive.mapjoin.optimized.hashtable.wbsize=100000; - -set hive.mapjoin.hybridgrace.hashtable=false; - --- Base result for inner join -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint) t1 -; - -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint) t1 -; - -set hive.mapjoin.hybridgrace.hashtable=true; - --- 16 partitions are created: 3 in memory, 13 on disk on creation. --- 1 partition is spilled during first round processing, which ends up having 2 in memory, 14 on disk -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint) t1 -; - -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint) t1 -; - - - -set hive.mapjoin.hybridgrace.hashtable=false; - --- Base result for outer join -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - left outer join alltypesorc cd - on cd.cint = c.cint) t1 -; - -select count(*) from -(select c.ctinyint - from alltypesorc c - left outer join alltypesorc cd - on cd.cint = c.cint) t1 -; - -set hive.mapjoin.hybridgrace.hashtable=true; - --- 32 partitions are created. 3 in memory, 29 on disk on creation. -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - left outer join alltypesorc cd - on cd.cint = c.cint) t1 -; - -select count(*) from -(select c.ctinyint - from alltypesorc c - left outer join alltypesorc cd - on cd.cint = c.cint) t1 -; - - --- Partitioned table -create table parttbl (key string, value char(20)) partitioned by (dt char(10)); -insert overwrite table parttbl partition(dt='2000-01-01') - select * from src; -insert overwrite table parttbl partition(dt='2000-01-02') - select * from src1; - -set hive.auto.convert.join.noconditionaltask.size=30000000; -set hive.mapjoin.optimized.hashtable.wbsize=10000000; - -set hive.mapjoin.hybridgrace.hashtable=false; - --- No spill, base result -explain -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -; - -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -; - -set hive.mapjoin.hybridgrace.hashtable=true; - --- No spill, 2 partitions created in memory -explain -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -; - -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -; - - -set hive.auto.convert.join.noconditionaltask.size=20000; -set hive.mapjoin.optimized.hashtable.wbsize=10000; - -set hive.mapjoin.hybridgrace.hashtable=false; - --- Spill case base result -explain -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -; - -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -; - -set hive.mapjoin.hybridgrace.hashtable=true; - --- Spill case, one partition in memory, one spilled on creation -explain -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -; - -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -; - -drop table parttbl; - - --- Test vectorization --- Test case borrowed from vector_decimal_mapjoin.q -CREATE TABLE decimal_mapjoin STORED AS ORC AS - SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, - CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2, - cint - FROM alltypesorc; - -SET hive.auto.convert.join=true; -SET hive.auto.convert.join.noconditionaltask=true; -SET hive.auto.convert.join.noconditionaltask.size=50000000; -set hive.mapjoin.optimized.hashtable.wbsize=10000; -SET hive.vectorized.execution.enabled=true; -set hive.mapjoin.hybridgrace.hashtable=false; - -EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 - FROM decimal_mapjoin l - JOIN decimal_mapjoin r ON l.cint = r.cint - WHERE l.cint = 6981; -SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 - FROM decimal_mapjoin l - JOIN decimal_mapjoin r ON l.cint = r.cint - WHERE l.cint = 6981; - -set hive.mapjoin.hybridgrace.hashtable=true; - -EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 - FROM decimal_mapjoin l - JOIN decimal_mapjoin r ON l.cint = r.cint - WHERE l.cint = 6981; -SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 - FROM decimal_mapjoin l - JOIN decimal_mapjoin r ON l.cint = r.cint - WHERE l.cint = 6981; - -DROP TABLE decimal_mapjoin; diff --git ql/src/test/queries/clientpositive/tez_join_hash.q ql/src/test/queries/clientpositive/tez_join_hash.q index 3571cd5..67d89f8 100644 --- ql/src/test/queries/clientpositive/tez_join_hash.q +++ ql/src/test/queries/clientpositive/tez_join_hash.q @@ -14,6 +14,8 @@ SELECT count(*) FROM src, orc_src where src.key=orc_src.key; set hive.auto.convert.join=true; set hive.auto.convert.join.noconditionaltask=true; set hive.auto.convert.join.noconditionaltask.size=3000; +set hive.mapjoin.hybridgrace.minwbsize=350; +set hive.mapjoin.hybridgrace.minnumpartitions=8; explain select count(*) from (select x.key as key, y.value as value from diff --git ql/src/test/queries/clientpositive/tez_smb_main.q ql/src/test/queries/clientpositive/tez_smb_main.q index 6398762..1802709 100644 --- ql/src/test/queries/clientpositive/tez_smb_main.q +++ ql/src/test/queries/clientpositive/tez_smb_main.q @@ -42,6 +42,8 @@ select count(*) from tab a join tab_part b on a.key = b.key; set hive.auto.convert.join.noconditionaltask.size=2000; +set hive.mapjoin.hybridgrace.minwbsize=500; +set hive.mapjoin.hybridgrace.minnumpartitions=4; explain select count (*) from tab a join tab_part b on a.key = b.key; @@ -50,6 +52,8 @@ select count(*) from tab a join tab_part b on a.key = b.key; set hive.auto.convert.join.noconditionaltask.size=1000; +set hive.mapjoin.hybridgrace.minwbsize=250; +set hive.mapjoin.hybridgrace.minnumpartitions=4; explain select count (*) from tab a join tab_part b on a.key = b.key; @@ -58,6 +62,8 @@ select count(*) from tab a join tab_part b on a.key = b.key; set hive.auto.convert.join.noconditionaltask.size=500; +set hive.mapjoin.hybridgrace.minwbsize=125; +set hive.mapjoin.hybridgrace.minnumpartitions=4; explain select count(*) from tab a join tab_part b on a.key = b.key join src1 c on a.value = c.value; select count(*) from tab a join tab_part b on a.key = b.key join src1 c on a.value = c.value; diff --git ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out new file mode 100644 index 0000000..33a85bf --- /dev/null +++ ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out @@ -0,0 +1,1583 @@ +PREHOOK: query: -- Hybrid Grace Hash Join +-- Test basic functionalities: +-- 1. Various cases when hash partitions spill +-- 2. Partitioned table spilling +-- 3. Vectorization + +SHOW DATABASES +PREHOOK: type: SHOWDATABASES +POSTHOOK: query: -- Hybrid Grace Hash Join +-- Test basic functionalities: +-- 1. Various cases when hash partitions spill +-- 2. Partitioned table spilling +-- 3. Vectorization + +SHOW DATABASES +POSTHOOK: type: SHOWDATABASES +default +PREHOOK: query: -- Base result for inner join +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint + where c.cint < 2000000000) t1 +PREHOOK: type: QUERY +POSTHOOK: query: -- Base result for inner join +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint + where c.cint < 2000000000) t1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint < 2000000000) (type: boolean) + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 3 + Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint < 2000000000) (type: boolean) + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint + where c.cint < 2000000000) t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint + where c.cint < 2000000000) t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3152013 +PREHOOK: query: -- Two partitions are created. One in memory, one on disk on creation. +-- The one in memory will eventually exceed memory limit, but won't spill. +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint + where c.cint < 2000000000) t1 +PREHOOK: type: QUERY +POSTHOOK: query: -- Two partitions are created. One in memory, one on disk on creation. +-- The one in memory will eventually exceed memory limit, but won't spill. +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint + where c.cint < 2000000000) t1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint < 2000000000) (type: boolean) + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 3 + Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint < 2000000000) (type: boolean) + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint + where c.cint < 2000000000) t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint + where c.cint < 2000000000) t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3152013 +PREHOOK: query: -- Base result for inner join +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint) t1 +PREHOOK: type: QUERY +POSTHOOK: query: -- Base result for inner join +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint) t1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cint is not null (type: boolean) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 3 + Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cint is not null (type: boolean) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint) t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint) t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3152013 +PREHOOK: query: -- 16 partitions are created: 3 in memory, 13 on disk on creation. +-- 1 partition is spilled during first round processing, which ends up having 2 in memory, 14 on disk +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint) t1 +PREHOOK: type: QUERY +POSTHOOK: query: -- 16 partitions are created: 3 in memory, 13 on disk on creation. +-- 1 partition is spilled during first round processing, which ends up having 2 in memory, 14 on disk +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint) t1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cint is not null (type: boolean) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 3 + Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cint is not null (type: boolean) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint) t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint) t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3152013 +PREHOOK: query: -- Base result for outer join +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + left outer join alltypesorc cd + on cd.cint = c.cint) t1 +PREHOOK: type: QUERY +POSTHOOK: query: -- Base result for outer join +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + left outer join alltypesorc cd + on cd.cint = c.cint) t1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 3 + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from +(select c.ctinyint + from alltypesorc c + left outer join alltypesorc cd + on cd.cint = c.cint) t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from +(select c.ctinyint + from alltypesorc c + left outer join alltypesorc cd + on cd.cint = c.cint) t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3155128 +PREHOOK: query: -- 32 partitions are created. 3 in memory, 29 on disk on creation. +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + left outer join alltypesorc cd + on cd.cint = c.cint) t1 +PREHOOK: type: QUERY +POSTHOOK: query: -- 32 partitions are created. 3 in memory, 29 on disk on creation. +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + left outer join alltypesorc cd + on cd.cint = c.cint) t1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 3 + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from +(select c.ctinyint + from alltypesorc c + left outer join alltypesorc cd + on cd.cint = c.cint) t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from +(select c.ctinyint + from alltypesorc c + left outer join alltypesorc cd + on cd.cint = c.cint) t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3155128 +PREHOOK: query: -- Partitioned table +create table parttbl (key string, value char(20)) partitioned by (dt char(10)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parttbl +POSTHOOK: query: -- Partitioned table +create table parttbl (key string, value char(20)) partitioned by (dt char(10)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parttbl +PREHOOK: query: insert overwrite table parttbl partition(dt='2000-01-01') + select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@parttbl@dt=2000-01-01 +POSTHOOK: query: insert overwrite table parttbl partition(dt='2000-01-01') + select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@parttbl@dt=2000-01-01 +POSTHOOK: Lineage: parttbl PARTITION(dt=2000-01-01).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: parttbl PARTITION(dt=2000-01-01).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table parttbl partition(dt='2000-01-02') + select * from src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: default@parttbl@dt=2000-01-02 +POSTHOOK: query: insert overwrite table parttbl partition(dt='2000-01-02') + select * from src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@parttbl@dt=2000-01-02 +POSTHOOK: Lineage: parttbl PARTITION(dt=2000-01-02).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: parttbl PARTITION(dt=2000-01-02).value EXPRESSION [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- No spill, base result +explain +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +PREHOOK: type: QUERY +POSTHOOK: query: -- No spill, base result +explain +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 1 Map 3 + Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 3 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@parttbl +PREHOOK: Input: default@parttbl@dt=2000-01-01 +PREHOOK: Input: default@parttbl@dt=2000-01-02 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parttbl +POSTHOOK: Input: default@parttbl@dt=2000-01-01 +POSTHOOK: Input: default@parttbl@dt=2000-01-02 +#### A masked pattern was here #### +1217 +PREHOOK: query: -- No spill, 2 partitions created in memory +explain +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +PREHOOK: type: QUERY +POSTHOOK: query: -- No spill, 2 partitions created in memory +explain +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 1 Map 3 + Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 3 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@parttbl +PREHOOK: Input: default@parttbl@dt=2000-01-01 +PREHOOK: Input: default@parttbl@dt=2000-01-02 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parttbl +POSTHOOK: Input: default@parttbl@dt=2000-01-01 +POSTHOOK: Input: default@parttbl@dt=2000-01-02 +#### A masked pattern was here #### +1217 +PREHOOK: query: -- Spill case base result +explain +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +PREHOOK: type: QUERY +POSTHOOK: query: -- Spill case base result +explain +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 1 Map 3 + Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 3 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@parttbl +PREHOOK: Input: default@parttbl@dt=2000-01-01 +PREHOOK: Input: default@parttbl@dt=2000-01-02 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parttbl +POSTHOOK: Input: default@parttbl@dt=2000-01-01 +POSTHOOK: Input: default@parttbl@dt=2000-01-02 +#### A masked pattern was here #### +1217 +PREHOOK: query: -- Spill case, one partition in memory, one spilled on creation +explain +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +PREHOOK: type: QUERY +POSTHOOK: query: -- Spill case, one partition in memory, one spilled on creation +explain +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 1 Map 3 + Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 3 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@parttbl +PREHOOK: Input: default@parttbl@dt=2000-01-01 +PREHOOK: Input: default@parttbl@dt=2000-01-02 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parttbl +POSTHOOK: Input: default@parttbl@dt=2000-01-01 +POSTHOOK: Input: default@parttbl@dt=2000-01-02 +#### A masked pattern was here #### +1217 +PREHOOK: query: drop table parttbl +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parttbl +PREHOOK: Output: default@parttbl +POSTHOOK: query: drop table parttbl +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parttbl +POSTHOOK: Output: default@parttbl +PREHOOK: query: -- Test vectorization +-- Test case borrowed from vector_decimal_mapjoin.q +CREATE TABLE decimal_mapjoin STORED AS ORC AS + SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, + CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2, + cint + FROM alltypesorc +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: database:default +PREHOOK: Output: default@decimal_mapjoin +POSTHOOK: query: -- Test vectorization +-- Test case borrowed from vector_decimal_mapjoin.q +CREATE TABLE decimal_mapjoin STORED AS ORC AS + SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, + CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2, + cint + FROM alltypesorc +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: database:default +POSTHOOK: Output: default@decimal_mapjoin +PREHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 + FROM decimal_mapjoin l + JOIN decimal_mapjoin r ON l.cint = r.cint + WHERE l.cint = 6981 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 + FROM decimal_mapjoin l + JOIN decimal_mapjoin r ON l.cint = r.cint + WHERE l.cint = 6981 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: l + Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint = 6981) (type: boolean) + Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 6981 (type: int) + 1 6981 (type: int) + outputColumnNames: _col1, _col9 + input vertices: + 1 Map 2 + Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 6981 (type: int), 6981 (type: int), _col1 (type: decimal(20,10)), _col9 (type: decimal(23,14)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map 2 + Map Operator Tree: + TableScan + alias: r + Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint = 6981) (type: boolean) + Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 6981 (type: int) + sort order: + + Map-reduce partition columns: 6981 (type: int) + Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE + value expressions: cdecimal2 (type: decimal(23,14)) + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 + FROM decimal_mapjoin l + JOIN decimal_mapjoin r ON l.cint = r.cint + WHERE l.cint = 6981 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_mapjoin +#### A masked pattern was here #### +POSTHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 + FROM decimal_mapjoin l + JOIN decimal_mapjoin r ON l.cint = r.cint + WHERE l.cint = 6981 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_mapjoin +#### A masked pattern was here #### +6981 6981 NULL NULL +6981 6981 NULL -617.5607769230769 +6981 6981 NULL -617.5607769230769 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL 6984454.211097692 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL -617.5607769230769 +6981 6981 NULL -617.5607769230769 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL 6984454.211097692 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL -617.5607769230769 +6981 6981 NULL -617.5607769230769 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL 6984454.211097692 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL -617.5607769230769 +6981 6981 NULL -617.5607769230769 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL 6984454.211097692 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 5831542.269248378 NULL +6981 6981 5831542.269248378 -617.5607769230769 +6981 6981 5831542.269248378 -617.5607769230769 +6981 6981 5831542.269248378 NULL +6981 6981 5831542.269248378 NULL +6981 6981 5831542.269248378 NULL +6981 6981 5831542.269248378 6984454.211097692 +6981 6981 5831542.269248378 NULL +6981 6981 5831542.269248378 NULL +6981 6981 5831542.269248378 NULL +6981 6981 NULL NULL +6981 6981 NULL -617.5607769230769 +6981 6981 NULL -617.5607769230769 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL 6984454.211097692 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL -617.5607769230769 +6981 6981 NULL -617.5607769230769 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL 6984454.211097692 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL -617.5607769230769 +6981 6981 NULL -617.5607769230769 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL 6984454.211097692 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 -617.5607769230769 +6981 6981 -515.621072973 -617.5607769230769 +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 6984454.211097692 +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 -617.5607769230769 +6981 6981 -515.621072973 -617.5607769230769 +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 6984454.211097692 +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +PREHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 + FROM decimal_mapjoin l + JOIN decimal_mapjoin r ON l.cint = r.cint + WHERE l.cint = 6981 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 + FROM decimal_mapjoin l + JOIN decimal_mapjoin r ON l.cint = r.cint + WHERE l.cint = 6981 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: l + Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint = 6981) (type: boolean) + Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 6981 (type: int) + 1 6981 (type: int) + outputColumnNames: _col1, _col9 + input vertices: + 1 Map 2 + Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: 6981 (type: int), 6981 (type: int), _col1 (type: decimal(20,10)), _col9 (type: decimal(23,14)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map 2 + Map Operator Tree: + TableScan + alias: r + Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint = 6981) (type: boolean) + Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 6981 (type: int) + sort order: + + Map-reduce partition columns: 6981 (type: int) + Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE + value expressions: cdecimal2 (type: decimal(23,14)) + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 + FROM decimal_mapjoin l + JOIN decimal_mapjoin r ON l.cint = r.cint + WHERE l.cint = 6981 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_mapjoin +#### A masked pattern was here #### +POSTHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 + FROM decimal_mapjoin l + JOIN decimal_mapjoin r ON l.cint = r.cint + WHERE l.cint = 6981 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_mapjoin +#### A masked pattern was here #### +6981 6981 NULL NULL +6981 6981 NULL -617.5607769230769 +6981 6981 NULL -617.5607769230769 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL 6984454.211097692 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL -617.5607769230769 +6981 6981 NULL -617.5607769230769 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL 6984454.211097692 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL -617.5607769230769 +6981 6981 NULL -617.5607769230769 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL 6984454.211097692 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL -617.5607769230769 +6981 6981 NULL -617.5607769230769 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL 6984454.211097692 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 5831542.269248378 NULL +6981 6981 5831542.269248378 -617.5607769230769 +6981 6981 5831542.269248378 -617.5607769230769 +6981 6981 5831542.269248378 NULL +6981 6981 5831542.269248378 NULL +6981 6981 5831542.269248378 NULL +6981 6981 5831542.269248378 6984454.211097692 +6981 6981 5831542.269248378 NULL +6981 6981 5831542.269248378 NULL +6981 6981 5831542.269248378 NULL +6981 6981 NULL NULL +6981 6981 NULL -617.5607769230769 +6981 6981 NULL -617.5607769230769 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL 6984454.211097692 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL -617.5607769230769 +6981 6981 NULL -617.5607769230769 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL 6984454.211097692 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL -617.5607769230769 +6981 6981 NULL -617.5607769230769 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL 6984454.211097692 +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 NULL NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 -617.5607769230769 +6981 6981 -515.621072973 -617.5607769230769 +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 6984454.211097692 +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 -617.5607769230769 +6981 6981 -515.621072973 -617.5607769230769 +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 6984454.211097692 +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +6981 6981 -515.621072973 NULL +PREHOOK: query: DROP TABLE decimal_mapjoin +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@decimal_mapjoin +PREHOOK: Output: default@decimal_mapjoin +POSTHOOK: query: DROP TABLE decimal_mapjoin +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@decimal_mapjoin +POSTHOOK: Output: default@decimal_mapjoin diff --git ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_2.q.out ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_2.q.out new file mode 100644 index 0000000..b2e5026 --- /dev/null +++ ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_2.q.out @@ -0,0 +1,1397 @@ +PREHOOK: query: -- Hybrid Grace Hash Join +-- Test n-way join +SHOW DATABASES +PREHOOK: type: SHOWDATABASES +POSTHOOK: query: -- Hybrid Grace Hash Join +-- Test n-way join +SHOW DATABASES +POSTHOOK: type: SHOWDATABASES +default +PREHOOK: query: -- 3-way mapjoin (1 big table, 2 small tables) +SHOW DATABASES +PREHOOK: type: SHOWDATABASES +POSTHOOK: query: -- 3-way mapjoin (1 big table, 2 small tables) +SHOW DATABASES +POSTHOOK: type: SHOWDATABASES +default +PREHOOK: query: EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Map 2 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + input vertices: + 0 Map 1 + 2 Map 4 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 4 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +428 +PREHOOK: query: EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Map 2 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + input vertices: + 0 Map 1 + 2 Map 4 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 4 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +428 +PREHOOK: query: -- 4-way mapjoin (1 big table, 3 small tables) +SHOW DATABASES +PREHOOK: type: SHOWDATABASES +POSTHOOK: query: -- 4-way mapjoin (1 big table, 3 small tables) +SHOW DATABASES +POSTHOOK: type: SHOWDATABASES +default +PREHOOK: query: EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN srcpart w ON (x.key = w.key) +JOIN src y ON (y.key = x.key) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN srcpart w ON (x.key = w.key) +JOIN src y ON (y.key = x.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Map 2 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + Inner Join 0 to 3 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + input vertices: + 0 Map 1 + 2 Map 4 + 3 Map 5 + Statistics: Num rows: 3300 Data size: 35059 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 4 + Map Operator Tree: + TableScan + alias: w + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN srcpart w ON (x.key = w.key) +JOIN src y ON (y.key = x.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN srcpart w ON (x.key = w.key) +JOIN src y ON (y.key = x.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +5680 +PREHOOK: query: EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN srcpart w ON (x.key = w.key) +JOIN src y ON (y.key = x.key) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN srcpart w ON (x.key = w.key) +JOIN src y ON (y.key = x.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Map 2 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + Inner Join 0 to 3 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + input vertices: + 0 Map 1 + 2 Map 4 + 3 Map 5 + Statistics: Num rows: 3300 Data size: 35059 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 4 + Map Operator Tree: + TableScan + alias: w + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN srcpart w ON (x.key = w.key) +JOIN src y ON (y.key = x.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN srcpart w ON (x.key = w.key) +JOIN src y ON (y.key = x.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +5680 +PREHOOK: query: -- 2 sets of 3-way mapjoin under 2 different tasks +SHOW DATABASES +PREHOOK: type: SHOWDATABASES +POSTHOOK: query: -- 2 sets of 3-way mapjoin under 2 different tasks +SHOW DATABASES +POSTHOOK: type: SHOWDATABASES +default +PREHOOK: query: EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key) +UNION +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.value = z.value) +JOIN src y ON (y.value = x.value) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key) +UNION +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.value = z.value) +JOIN src y ON (y.value = x.value) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE) + Map 8 <- Map 10 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE), Union 4 (CONTAINS) + Reducer 5 <- Union 4 (SIMPLE_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE), Union 4 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Map 10 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 2 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + input vertices: + 0 Map 1 + 2 Map 6 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 6 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 7 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Map 8 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 value (type: string) + 1 value (type: string) + 2 value (type: string) + input vertices: + 0 Map 7 + 2 Map 10 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reducer 5 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Union 4 + Vertex: Union 4 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key) +UNION +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.value = z.value) +JOIN src y ON (y.value = x.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key) +UNION +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.value = z.value) +JOIN src y ON (y.value = x.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +428 +452 +PREHOOK: query: EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key) +UNION +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.value = z.value) +JOIN src y ON (y.value = x.value) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key) +UNION +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.value = z.value) +JOIN src y ON (y.value = x.value) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE) + Map 8 <- Map 10 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE), Union 4 (CONTAINS) + Reducer 5 <- Union 4 (SIMPLE_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE), Union 4 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Map 10 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 2 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + input vertices: + 0 Map 1 + 2 Map 6 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 6 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 7 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Map 8 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 value (type: string) + 1 value (type: string) + 2 value (type: string) + input vertices: + 0 Map 7 + 2 Map 10 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reducer 5 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Union 4 + Vertex: Union 4 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key) +UNION +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.value = z.value) +JOIN src y ON (y.value = x.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key) +UNION +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.value = z.value) +JOIN src y ON (y.value = x.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +428 +452 +PREHOOK: query: -- A chain of 2 sets of 3-way mapjoin under the same task +SHOW DATABASES +PREHOOK: type: SHOWDATABASES +POSTHOOK: query: -- A chain of 2 sets of 3-way mapjoin under the same task +SHOW DATABASES +POSTHOOK: type: SHOWDATABASES +default +PREHOOK: query: EXPLAIN +SELECT COUNT(*) +FROM src1 x +JOIN srcpart z1 ON (x.key = z1.key) +JOIN src y1 ON (x.key = y1.key) +JOIN srcpart z2 ON (x.value = z2.value) +JOIN src y2 ON (x.value = y2.value) +WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz' + AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz' +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT COUNT(*) +FROM src1 x +JOIN srcpart z1 ON (x.key = z1.key) +JOIN src y1 ON (x.key = y1.key) +JOIN srcpart z2 ON (x.value = z2.value) +JOIN src y2 ON (x.value = y2.value) +WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz' + AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key is not null and value is not null) and (value < 'zzzzzzzzzz')) and (key < 'zzzzzzzz')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 2 + Map Operator Tree: + TableScan + alias: z1 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 'zzzzzzzz') (type: boolean) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col1 + input vertices: + 0 Map 1 + 2 Map 4 + Statistics: Num rows: 1465 Data size: 15565 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col1 (type: string) + 1 value (type: string) + 2 value (type: string) + input vertices: + 1 Map 5 + 2 Map 6 + Statistics: Num rows: 3223 Data size: 34243 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 4 + Map Operator Tree: + TableScan + alias: y1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key is not null and (value < 'zzzzzzzz')) and (key < 'zzzzzzzz')) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: z2 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((value is not null and (key < 'zzzzzzzzzz')) and (value < 'zzzzzzzzzz')) (type: boolean) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: y2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (value < 'zzzzzzzzzz') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT COUNT(*) +FROM src1 x +JOIN srcpart z1 ON (x.key = z1.key) +JOIN src y1 ON (x.key = y1.key) +JOIN srcpart z2 ON (x.value = z2.value) +JOIN src y2 ON (x.value = y2.value) +WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz' + AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(*) +FROM src1 x +JOIN srcpart z1 ON (x.key = z1.key) +JOIN src y1 ON (x.key = y1.key) +JOIN srcpart z2 ON (x.value = z2.value) +JOIN src y2 ON (x.value = y2.value) +WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz' + AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +18256 +PREHOOK: query: EXPLAIN +SELECT COUNT(*) +FROM src1 x +JOIN srcpart z1 ON (x.key = z1.key) +JOIN src y1 ON (x.key = y1.key) +JOIN srcpart z2 ON (x.value = z2.value) +JOIN src y2 ON (x.value = y2.value) +WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz' + AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz' +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT COUNT(*) +FROM src1 x +JOIN srcpart z1 ON (x.key = z1.key) +JOIN src y1 ON (x.key = y1.key) +JOIN srcpart z2 ON (x.value = z2.value) +JOIN src y2 ON (x.value = y2.value) +WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz' + AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key is not null and value is not null) and (value < 'zzzzzzzzzz')) and (key < 'zzzzzzzz')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 2 + Map Operator Tree: + TableScan + alias: z1 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 'zzzzzzzz') (type: boolean) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col1 + input vertices: + 0 Map 1 + 2 Map 4 + Statistics: Num rows: 1465 Data size: 15565 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col1 (type: string) + 1 value (type: string) + 2 value (type: string) + input vertices: + 1 Map 5 + 2 Map 6 + Statistics: Num rows: 3223 Data size: 34243 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 4 + Map Operator Tree: + TableScan + alias: y1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key is not null and (value < 'zzzzzzzz')) and (key < 'zzzzzzzz')) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: z2 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((value is not null and (key < 'zzzzzzzzzz')) and (value < 'zzzzzzzzzz')) (type: boolean) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: y2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (value < 'zzzzzzzzzz') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT COUNT(*) +FROM src1 x +JOIN srcpart z1 ON (x.key = z1.key) +JOIN src y1 ON (x.key = y1.key) +JOIN srcpart z2 ON (x.value = z2.value) +JOIN src y2 ON (x.value = y2.value) +WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz' + AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(*) +FROM src1 x +JOIN srcpart z1 ON (x.key = z1.key) +JOIN src y1 ON (x.key = y1.key) +JOIN srcpart z2 ON (x.value = z2.value) +JOIN src y2 ON (x.value = y2.value) +WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz' + AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +18256 diff --git ql/src/test/results/clientpositive/tez/hybridhashjoin.q.out ql/src/test/results/clientpositive/tez/hybridhashjoin.q.out deleted file mode 100644 index 5d5b850..0000000 --- ql/src/test/results/clientpositive/tez/hybridhashjoin.q.out +++ /dev/null @@ -1,1566 +0,0 @@ -PREHOOK: query: -- Base result for inner join -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint - where c.cint < 2000000000) t1 -PREHOOK: type: QUERY -POSTHOOK: query: -- Base result for inner join -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint - where c.cint < 2000000000) t1 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cint < 2000000000) (type: boolean) - Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cint < 2000000000) (type: boolean) - Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint - where c.cint < 2000000000) t1 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint - where c.cint < 2000000000) t1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -#### A masked pattern was here #### -3152013 -PREHOOK: query: -- Two partitions are created. One in memory, one on disk on creation. --- The one in memory will eventually exceed memory limit, but won't spill. -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint - where c.cint < 2000000000) t1 -PREHOOK: type: QUERY -POSTHOOK: query: -- Two partitions are created. One in memory, one on disk on creation. --- The one in memory will eventually exceed memory limit, but won't spill. -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint - where c.cint < 2000000000) t1 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cint < 2000000000) (type: boolean) - Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cint < 2000000000) (type: boolean) - Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint - where c.cint < 2000000000) t1 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint - where c.cint < 2000000000) t1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -#### A masked pattern was here #### -3152013 -PREHOOK: query: -- Base result for inner join -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint) t1 -PREHOOK: type: QUERY -POSTHOOK: query: -- Base result for inner join -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint) t1 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: cint is not null (type: boolean) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: cint is not null (type: boolean) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint) t1 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint) t1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -#### A masked pattern was here #### -3152013 -PREHOOK: query: -- 16 partitions are created: 3 in memory, 13 on disk on creation. --- 1 partition is spilled during first round processing, which ends up having 2 in memory, 14 on disk -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint) t1 -PREHOOK: type: QUERY -POSTHOOK: query: -- 16 partitions are created: 3 in memory, 13 on disk on creation. --- 1 partition is spilled during first round processing, which ends up having 2 in memory, 14 on disk -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint) t1 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: cint is not null (type: boolean) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: cint is not null (type: boolean) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint) t1 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint) t1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -#### A masked pattern was here #### -3152013 -PREHOOK: query: -- Base result for outer join -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - left outer join alltypesorc cd - on cd.cint = c.cint) t1 -PREHOOK: type: QUERY -POSTHOOK: query: -- Base result for outer join -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - left outer join alltypesorc cd - on cd.cint = c.cint) t1 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from -(select c.ctinyint - from alltypesorc c - left outer join alltypesorc cd - on cd.cint = c.cint) t1 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from -(select c.ctinyint - from alltypesorc c - left outer join alltypesorc cd - on cd.cint = c.cint) t1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -#### A masked pattern was here #### -3155128 -PREHOOK: query: -- 32 partitions are created. 3 in memory, 29 on disk on creation. -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - left outer join alltypesorc cd - on cd.cint = c.cint) t1 -PREHOOK: type: QUERY -POSTHOOK: query: -- 32 partitions are created. 3 in memory, 29 on disk on creation. -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - left outer join alltypesorc cd - on cd.cint = c.cint) t1 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from -(select c.ctinyint - from alltypesorc c - left outer join alltypesorc cd - on cd.cint = c.cint) t1 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from -(select c.ctinyint - from alltypesorc c - left outer join alltypesorc cd - on cd.cint = c.cint) t1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -#### A masked pattern was here #### -3155128 -PREHOOK: query: -- Partitioned table -create table parttbl (key string, value char(20)) partitioned by (dt char(10)) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@parttbl -POSTHOOK: query: -- Partitioned table -create table parttbl (key string, value char(20)) partitioned by (dt char(10)) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@parttbl -PREHOOK: query: insert overwrite table parttbl partition(dt='2000-01-01') - select * from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@parttbl@dt=2000-01-01 -POSTHOOK: query: insert overwrite table parttbl partition(dt='2000-01-01') - select * from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@parttbl@dt=2000-01-01 -POSTHOOK: Lineage: parttbl PARTITION(dt=2000-01-01).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: parttbl PARTITION(dt=2000-01-01).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table parttbl partition(dt='2000-01-02') - select * from src1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -PREHOOK: Output: default@parttbl@dt=2000-01-02 -POSTHOOK: query: insert overwrite table parttbl partition(dt='2000-01-02') - select * from src1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -POSTHOOK: Output: default@parttbl@dt=2000-01-02 -POSTHOOK: Lineage: parttbl PARTITION(dt=2000-01-02).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: parttbl PARTITION(dt=2000-01-02).value EXPRESSION [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- No spill, base result -explain -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -PREHOOK: type: QUERY -POSTHOOK: query: -- No spill, base result -explain -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 1 Map 3 - Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: p2 - Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -PREHOOK: type: QUERY -PREHOOK: Input: default@parttbl -PREHOOK: Input: default@parttbl@dt=2000-01-01 -PREHOOK: Input: default@parttbl@dt=2000-01-02 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@parttbl -POSTHOOK: Input: default@parttbl@dt=2000-01-01 -POSTHOOK: Input: default@parttbl@dt=2000-01-02 -#### A masked pattern was here #### -1217 -PREHOOK: query: -- No spill, 2 partitions created in memory -explain -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -PREHOOK: type: QUERY -POSTHOOK: query: -- No spill, 2 partitions created in memory -explain -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 1 Map 3 - Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: p2 - Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -PREHOOK: type: QUERY -PREHOOK: Input: default@parttbl -PREHOOK: Input: default@parttbl@dt=2000-01-01 -PREHOOK: Input: default@parttbl@dt=2000-01-02 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@parttbl -POSTHOOK: Input: default@parttbl@dt=2000-01-01 -POSTHOOK: Input: default@parttbl@dt=2000-01-02 -#### A masked pattern was here #### -1217 -PREHOOK: query: -- Spill case base result -explain -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -PREHOOK: type: QUERY -POSTHOOK: query: -- Spill case base result -explain -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 1 Map 3 - Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: p2 - Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -PREHOOK: type: QUERY -PREHOOK: Input: default@parttbl -PREHOOK: Input: default@parttbl@dt=2000-01-01 -PREHOOK: Input: default@parttbl@dt=2000-01-02 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@parttbl -POSTHOOK: Input: default@parttbl@dt=2000-01-01 -POSTHOOK: Input: default@parttbl@dt=2000-01-02 -#### A masked pattern was here #### -1217 -PREHOOK: query: -- Spill case, one partition in memory, one spilled on creation -explain -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -PREHOOK: type: QUERY -POSTHOOK: query: -- Spill case, one partition in memory, one spilled on creation -explain -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 1 Map 3 - Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: p2 - Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -PREHOOK: type: QUERY -PREHOOK: Input: default@parttbl -PREHOOK: Input: default@parttbl@dt=2000-01-01 -PREHOOK: Input: default@parttbl@dt=2000-01-02 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@parttbl -POSTHOOK: Input: default@parttbl@dt=2000-01-01 -POSTHOOK: Input: default@parttbl@dt=2000-01-02 -#### A masked pattern was here #### -1217 -PREHOOK: query: drop table parttbl -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@parttbl -PREHOOK: Output: default@parttbl -POSTHOOK: query: drop table parttbl -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@parttbl -POSTHOOK: Output: default@parttbl -PREHOOK: query: -- Test vectorization --- Test case borrowed from vector_decimal_mapjoin.q -CREATE TABLE decimal_mapjoin STORED AS ORC AS - SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, - CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2, - cint - FROM alltypesorc -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@alltypesorc -PREHOOK: Output: database:default -PREHOOK: Output: default@decimal_mapjoin -POSTHOOK: query: -- Test vectorization --- Test case borrowed from vector_decimal_mapjoin.q -CREATE TABLE decimal_mapjoin STORED AS ORC AS - SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, - CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2, - cint - FROM alltypesorc -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@alltypesorc -POSTHOOK: Output: database:default -POSTHOOK: Output: default@decimal_mapjoin -PREHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 - FROM decimal_mapjoin l - JOIN decimal_mapjoin r ON l.cint = r.cint - WHERE l.cint = 6981 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 - FROM decimal_mapjoin l - JOIN decimal_mapjoin r ON l.cint = r.cint - WHERE l.cint = 6981 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: l - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cint = 6981) (type: boolean) - Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 6981 (type: int) - 1 6981 (type: int) - outputColumnNames: _col1, _col9 - input vertices: - 1 Map 2 - Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 6981 (type: int), 6981 (type: int), _col1 (type: decimal(20,10)), _col9 (type: decimal(23,14)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Map 2 - Map Operator Tree: - TableScan - alias: r - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cint = 6981) (type: boolean) - Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: 6981 (type: int) - sort order: + - Map-reduce partition columns: 6981 (type: int) - Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE - value expressions: cdecimal2 (type: decimal(23,14)) - Execution mode: vectorized - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 - FROM decimal_mapjoin l - JOIN decimal_mapjoin r ON l.cint = r.cint - WHERE l.cint = 6981 -PREHOOK: type: QUERY -PREHOOK: Input: default@decimal_mapjoin -#### A masked pattern was here #### -POSTHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 - FROM decimal_mapjoin l - JOIN decimal_mapjoin r ON l.cint = r.cint - WHERE l.cint = 6981 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@decimal_mapjoin -#### A masked pattern was here #### -6981 6981 NULL NULL -6981 6981 NULL -617.5607769230769 -6981 6981 NULL -617.5607769230769 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL 6984454.211097692 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL -617.5607769230769 -6981 6981 NULL -617.5607769230769 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL 6984454.211097692 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL -617.5607769230769 -6981 6981 NULL -617.5607769230769 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL 6984454.211097692 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL -617.5607769230769 -6981 6981 NULL -617.5607769230769 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL 6984454.211097692 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 5831542.269248378 NULL -6981 6981 5831542.269248378 -617.5607769230769 -6981 6981 5831542.269248378 -617.5607769230769 -6981 6981 5831542.269248378 NULL -6981 6981 5831542.269248378 NULL -6981 6981 5831542.269248378 NULL -6981 6981 5831542.269248378 6984454.211097692 -6981 6981 5831542.269248378 NULL -6981 6981 5831542.269248378 NULL -6981 6981 5831542.269248378 NULL -6981 6981 NULL NULL -6981 6981 NULL -617.5607769230769 -6981 6981 NULL -617.5607769230769 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL 6984454.211097692 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL -617.5607769230769 -6981 6981 NULL -617.5607769230769 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL 6984454.211097692 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL -617.5607769230769 -6981 6981 NULL -617.5607769230769 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL 6984454.211097692 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 -617.5607769230769 -6981 6981 -515.621072973 -617.5607769230769 -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 6984454.211097692 -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 -617.5607769230769 -6981 6981 -515.621072973 -617.5607769230769 -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 6984454.211097692 -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -PREHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 - FROM decimal_mapjoin l - JOIN decimal_mapjoin r ON l.cint = r.cint - WHERE l.cint = 6981 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 - FROM decimal_mapjoin l - JOIN decimal_mapjoin r ON l.cint = r.cint - WHERE l.cint = 6981 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: l - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cint = 6981) (type: boolean) - Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 6981 (type: int) - 1 6981 (type: int) - outputColumnNames: _col1, _col9 - input vertices: - 1 Map 2 - Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Select Operator - expressions: 6981 (type: int), 6981 (type: int), _col1 (type: decimal(20,10)), _col9 (type: decimal(23,14)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Map 2 - Map Operator Tree: - TableScan - alias: r - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cint = 6981) (type: boolean) - Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: 6981 (type: int) - sort order: + - Map-reduce partition columns: 6981 (type: int) - Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE - value expressions: cdecimal2 (type: decimal(23,14)) - Execution mode: vectorized - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 - FROM decimal_mapjoin l - JOIN decimal_mapjoin r ON l.cint = r.cint - WHERE l.cint = 6981 -PREHOOK: type: QUERY -PREHOOK: Input: default@decimal_mapjoin -#### A masked pattern was here #### -POSTHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 - FROM decimal_mapjoin l - JOIN decimal_mapjoin r ON l.cint = r.cint - WHERE l.cint = 6981 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@decimal_mapjoin -#### A masked pattern was here #### -6981 6981 NULL NULL -6981 6981 NULL -617.5607769230769 -6981 6981 NULL -617.5607769230769 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL 6984454.211097692 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL -617.5607769230769 -6981 6981 NULL -617.5607769230769 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL 6984454.211097692 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL -617.5607769230769 -6981 6981 NULL -617.5607769230769 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL 6984454.211097692 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL -617.5607769230769 -6981 6981 NULL -617.5607769230769 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL 6984454.211097692 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 5831542.269248378 NULL -6981 6981 5831542.269248378 -617.5607769230769 -6981 6981 5831542.269248378 -617.5607769230769 -6981 6981 5831542.269248378 NULL -6981 6981 5831542.269248378 NULL -6981 6981 5831542.269248378 NULL -6981 6981 5831542.269248378 6984454.211097692 -6981 6981 5831542.269248378 NULL -6981 6981 5831542.269248378 NULL -6981 6981 5831542.269248378 NULL -6981 6981 NULL NULL -6981 6981 NULL -617.5607769230769 -6981 6981 NULL -617.5607769230769 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL 6984454.211097692 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL -617.5607769230769 -6981 6981 NULL -617.5607769230769 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL 6984454.211097692 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL -617.5607769230769 -6981 6981 NULL -617.5607769230769 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL 6984454.211097692 -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 NULL NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 -617.5607769230769 -6981 6981 -515.621072973 -617.5607769230769 -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 6984454.211097692 -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 -617.5607769230769 -6981 6981 -515.621072973 -617.5607769230769 -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 6984454.211097692 -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -6981 6981 -515.621072973 NULL -PREHOOK: query: DROP TABLE decimal_mapjoin -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@decimal_mapjoin -PREHOOK: Output: default@decimal_mapjoin -POSTHOOK: query: DROP TABLE decimal_mapjoin -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@decimal_mapjoin -POSTHOOK: Output: default@decimal_mapjoin