diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 7cee344..c2b7778 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2811,9 +2811,17 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "1. chosen : use VectorUDFAdaptor for a small set of UDFs that were choosen for good performance\n" + "2. all : use VectorUDFAdaptor for all UDFs" ), - HIVE_VECTORIZATION_PTF_ENABLED("hive.vectorized.execution.ptf.enabled", false, + HIVE_VECTORIZATION_PTF_ENABLED("hive.vectorized.execution.ptf.enabled", true, "This flag should be set to true to enable vectorized mode of the PTF of query execution.\n" + - "The default value is false."), + "The default value is true."), + + HIVE_VECTORIZATION_PTF_MAX_MEMORY_BUFFERING_BATCH_COUNT("hive.vectorized.ptf.max.memory.buffering.batch.count", 25, + "Maximum number of vectorized row batches to buffer in memory for PTF\n" + + "The default value is 25"), + HIVE_VECTORIZATION_TESTING_REDUCER_BATCH_SIZE("hive.vectorized.testing.reducer.batch.size", -1, + "internal use only, used for creating small group key vectorized row batches to exercise more logic\n" + + "The default value is -1 which means don't restrict for testing", + true), HIVE_VECTORIZATION_COMPLEX_TYPES_ENABLED("hive.vectorized.complex.types.enabled", true, "This flag should be set to true to enable vectorization\n" + diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java index 6523f00..ac4f5ab 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java @@ -42,7 +42,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapper; import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapperBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinRowBytesContainer; +import org.apache.hadoop.hive.ql.exec.vector.rowbytescontainer.VectorRowBytesContainer; import org.apache.hadoop.hive.ql.io.HiveKey; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.HiveUtils; @@ -131,7 +131,7 @@ public long getEstimatedMemorySize() { BytesBytesMultiHashMap hashMap; // In memory hashMap KeyValueContainer sidefileKVContainer; // Stores small table key/value pairs ObjectContainer matchfileObjContainer; // Stores big table rows - VectorMapJoinRowBytesContainer matchfileRowBytesContainer; + VectorRowBytesContainer matchfileRowBytesContainer; // Stores big table rows as bytes for native vector map join. Path hashMapLocalPath; // Local file system path for spilled hashMap boolean hashMapOnDisk; // Status of hashMap. true: on disk, false: in memory @@ -216,9 +216,9 @@ public ObjectContainer getMatchfileObjContainer() { } /* Get the big table row bytes container for native vector map join */ - public VectorMapJoinRowBytesContainer getMatchfileRowBytesContainer() { + public VectorRowBytesContainer getMatchfileRowBytesContainer() { if (matchfileRowBytesContainer == null) { - matchfileRowBytesContainer = new VectorMapJoinRowBytesContainer(spillLocalDirs); + matchfileRowBytesContainer = new VectorRowBytesContainer(spillLocalDirs); } return matchfileRowBytesContainer; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java index f6f2dd0..9de3850 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java @@ -297,7 +297,8 @@ private void initializeSourceForTag(ReduceWork redWork, int tag, ObjectInspector boolean vectorizedRecordSource = (tag == bigTablePosition) && redWork.getVectorMode(); sources[tag].init(jconf, redWork.getReducer(), vectorizedRecordSource, keyTableDesc, valueTableDesc, reader, tag == bigTablePosition, (byte) tag, - redWork.getVectorizedRowBatchCtx(), redWork.getVectorizedVertexNum()); + redWork.getVectorizedRowBatchCtx(), redWork.getVectorizedVertexNum(), + redWork.getVectorizedTestingReducerBatchSize()); ois[tag] = sources[tag].getObjectInspector(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java index bdde81a..6cea754 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java @@ -121,13 +121,22 @@ private final GroupIterator groupIterator = new GroupIterator(); private long vectorizedVertexNum; + private int vectorizedTestingReducerBatchSize; void init(JobConf jconf, Operator reducer, boolean vectorized, TableDesc keyTableDesc, TableDesc valueTableDesc, Reader reader, boolean handleGroupKey, byte tag, - VectorizedRowBatchCtx batchContext, long vectorizedVertexNum) + VectorizedRowBatchCtx batchContext, long vectorizedVertexNum, + int vectorizedTestingReducerBatchSize) throws Exception { this.vectorizedVertexNum = vectorizedVertexNum; + if (vectorizedTestingReducerBatchSize > VectorizedRowBatch.DEFAULT_SIZE) { + + // For now, we don't go higher than the default batch size unless we do more work + // to verify every vectorized operator downstream can handle a larger batch size. + vectorizedTestingReducerBatchSize = VectorizedRowBatch.DEFAULT_SIZE; + } + this.vectorizedTestingReducerBatchSize = vectorizedTestingReducerBatchSize; ObjectInspector keyObjectInspector; this.reducer = reducer; @@ -417,7 +426,10 @@ private void processVectorGroup(BytesWritable keyWritable, VectorizedBatchUtil.setRepeatingColumn(batch, i); } - final int maxSize = batch.getMaxSize(); + final int maxSize = + (vectorizedTestingReducerBatchSize > 0 ? + Math.min(vectorizedTestingReducerBatchSize, batch.getMaxSize()) : + batch.getMaxSize()); Preconditions.checkState(maxSize > 0); int rowIdx = 0; int batchBytes = keyBytes.length; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java index be471c6..211622d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java @@ -111,6 +111,22 @@ public void init(List typeNames) throws HiveException { vectorExtractRow.init(typeInfos); } + public void init(TypeInfo[] typeInfos) + throws HiveException { + + final int size = typeInfos.length; + this.typeInfos = Arrays.copyOf(typeInfos, size); + outputColumnNums = new int[size]; + objectInspectors = new ObjectInspector[size]; + for (int i = 0; i < size; i++) { + objectInspectors[i] = + TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfos[i]); + outputColumnNums[i] = i; + } + + vectorExtractRow.init(this.typeInfos, outputColumnNums); + } + public void init(TypeInfo[] typeInfos, int[] columnMap) throws HiveException { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java index bab5ee4..ecf4b9a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java @@ -42,6 +42,7 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.optimized.VectorMapJoinOptimizedCreateHashTable; +import org.apache.hadoop.hive.ql.exec.vector.rowbytescontainer.VectorRowBytesContainer; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.serde2.SerDeException; @@ -481,7 +482,7 @@ private void spillSerializeRow(VectorizedRowBatch batch, int batchIndex, HybridHashTableContainer ht = (HybridHashTableContainer) mapJoinTables[posSingleVectorMapJoinSmallTable]; HashPartition hp = ht.getHashPartitions()[partitionId]; - VectorMapJoinRowBytesContainer rowBytesContainer = hp.getMatchfileRowBytesContainer(); + VectorRowBytesContainer rowBytesContainer = hp.getMatchfileRowBytesContainer(); Output output = rowBytesContainer.getOuputForRowBytes(); // int offset = output.getLength(); bigTableVectorSerializeRow.setOutputAppend(output); @@ -568,7 +569,7 @@ protected void reProcessBigTable(int partitionId) int batchCount = 0; try { - VectorMapJoinRowBytesContainer bigTable = partition.getMatchfileRowBytesContainer(); + VectorRowBytesContainer bigTable = partition.getMatchfileRowBytesContainer(); bigTable.prepareForReading(); while (bigTable.readNext()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java deleted file mode 100644 index fa96ae9..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java +++ /dev/null @@ -1,321 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.exec.vector.mapjoin; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.fs.FileUtil; -import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.serde2.ByteStream.Output; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; - -/** - * An eager bytes container that puts row bytes to an output stream. - */ -public class VectorMapJoinRowBytesContainer { - - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinRowBytesContainer.class); - - private File parentDir; - private File tmpFile; - - // We buffer in a org.apache.hadoop.hive.serde2.ByteStream.Output since that is what - // is used by VectorSerializeRow / SerializeWrite. Periodically, we flush this buffer - // to disk. - private Output output; - private int rowBeginPos; - private static final int OUTPUT_SIZE = 4096; - private static final int THRESHOLD = 8 * (OUTPUT_SIZE / 10); - private static final int INPUT_SIZE = 4096; - - private FileOutputStream fileOutputStream; - - private boolean isOpen; - - private byte[] readBuffer; - private byte[] largeRowBuffer; - private int readOffset; - private int readLength; - - private int readNextCount; - private int readNextIndex; - - private static final int MAX_READS = 256; - private byte[][] readNextBytes; - private int readNextOffsets[]; - private int readNextLengths[]; - - private byte[] currentBytes; - private int currentOffset; - private int currentLength; - - private long totalWriteLength; - private long totalReadLength; - - private FileInputStream fileInputStream; - - private final String spillLocalDirs; - - public VectorMapJoinRowBytesContainer(String spillLocalDirs) { - output = new Output(); - readBuffer = new byte[INPUT_SIZE]; - readNextBytes = new byte[MAX_READS][]; - readNextOffsets = new int[MAX_READS]; - readNextLengths = new int[MAX_READS]; - isOpen = false; - totalWriteLength = 0; - totalReadLength = 0; - this.spillLocalDirs = spillLocalDirs; - } - - private void setupOutputFileStreams() throws IOException { - parentDir = FileUtils.createLocalDirsTempFile(spillLocalDirs, "bytes-container", "", true); - parentDir.deleteOnExit(); - tmpFile = File.createTempFile("BytesContainer", ".tmp", parentDir); - LOG.debug("BytesContainer created temp file " + tmpFile.getAbsolutePath()); - tmpFile.deleteOnExit(); - - fileOutputStream = new FileOutputStream(tmpFile); - } - - private void initFile() { - try { - setupOutputFileStreams(); - } catch (IOException e) { - throw new RuntimeException("Failed to create temporary output file on disk", e); - } - } - - public Output getOuputForRowBytes() { - if (!isOpen) { - initFile(); - isOpen = true; - } - // Reserve space for the int length. - output.reserve(4); - rowBeginPos = output.getLength(); - return output; - } - - public void finishRow() throws IOException { - int length = output.getLength() - rowBeginPos; - output.writeInt(rowBeginPos - 4, length); - if (output.getLength() > THRESHOLD) { - fileOutputStream.write(output.getData(), 0, output.getLength()); - totalWriteLength += output.getLength(); - output.reset(); - } - } - - public void prepareForReading() throws IOException { - if (!isOpen) { - return; - } - if (output.getLength() > 0) { - fileOutputStream.write(output.getData(), 0, output.getLength()); - totalWriteLength += output.getLength(); - fileOutputStream.flush(); - output.reset(); - } - if (fileInputStream != null) { - fileInputStream.close(); - } - fileInputStream = new FileInputStream(tmpFile); - readNextIndex = 0; - readNextCount = 0; - } - - private int readInt() { - int value = (((readBuffer[readOffset] & 0xFF) << 24) | - ((readBuffer[readOffset + 1] & 0xFF) << 16) | - ((readBuffer[readOffset + 2] & 0xFF) << 8) | - ((readBuffer[readOffset + 3] & 0xFF))); - readOffset += 4; - return value; - } - - // Call when nextReadIndex == nextReadCount. - private void bufferedRead() throws IOException { - - // Reset for reading. - readNextIndex = 0; - - // Reset for filling. - readNextCount = 0; - - if (readOffset < readLength) { - // Move unprocessed remainder to beginning of buffer. - int unprocessLength = readLength - readOffset; - System.arraycopy(readBuffer, readOffset, readBuffer, 0, unprocessLength); - - int maxReadLength = readBuffer.length - unprocessLength; - int partialReadLength = fileInputStream.read(readBuffer, unprocessLength, maxReadLength); - if (partialReadLength == -1) { - partialReadLength = 0; - } - totalReadLength += partialReadLength; - readLength = unprocessLength + partialReadLength; - readOffset = 0; - } else { - readOffset = 0; - readLength = fileInputStream.read(readBuffer, 0, readBuffer.length); - if (readLength == -1) { - readLength = 0; - } - totalReadLength += readLength; - } - if (readLength == 0) { - return; - } - if (readLength < 0) { - throw new IOException("Negative read length"); - } - - // Get length word. - if (readLength < 4) { - throw new IOException("Expecting 4 byte length"); - } - - while (true) { - // Use Input class to read length. - int saveReadOffset = readOffset; - int rowLength = readInt(); - if (rowLength < 0) { - throw new IOException("Negative row length"); - } - int remainingLength = readLength - readOffset; - if (remainingLength < rowLength) { - if (readNextCount > 0) { - // Leave this one for the next round. - readOffset = saveReadOffset; - break; - } - - // Buffer needed to bridge. - if (largeRowBuffer == null || largeRowBuffer.length < rowLength) { - int newLargeBufferLength = Math.max(Integer.highestOneBit(rowLength) << 1, INPUT_SIZE); - largeRowBuffer = new byte[newLargeBufferLength]; - } - System.arraycopy(readBuffer, readOffset, largeRowBuffer, 0, remainingLength); - int expectedPartialLength = rowLength - remainingLength; - int partialReadLength = fileInputStream.read(largeRowBuffer, remainingLength, expectedPartialLength); - if (partialReadLength == -1) { - throw new IOException("Unexpected EOF (total write length " + totalWriteLength + - ", total read length " + totalReadLength + ", read length " + - expectedPartialLength + ")"); - } - - if (expectedPartialLength != partialReadLength) { - throw new IOException("Unable to read a complete row of length " + rowLength + - " (total write length " + totalWriteLength + - ", total read length " + totalReadLength + ", read length " + - expectedPartialLength + ", actual length " + partialReadLength + ")"); - } - totalReadLength += partialReadLength; - - readNextBytes[readNextCount] = largeRowBuffer; - readNextOffsets[readNextCount] = 0; - readNextLengths[readNextCount] = rowLength; - - // Indicate we used the last row's bytes for large buffer. - readOffset = readLength; - readNextCount++; - break; - } - - readNextBytes[readNextCount] = readBuffer; - readNextOffsets[readNextCount] = readOffset; - readNextLengths[readNextCount] = rowLength; - readOffset += rowLength; - readNextCount++; - - if (readNextCount >= readNextBytes.length){ - break; - } - if (readLength - readOffset < 4) { - // Handle in next round. - break; - } - } - } - - public boolean readNext() throws IOException { - if (!isOpen) { - return false; - } - if (readNextIndex >= readNextCount) { - bufferedRead(); - // Any more left? - if (readNextIndex >= readNextCount) { - return false; - } - } - - currentBytes = readNextBytes[readNextIndex]; - currentOffset = readNextOffsets[readNextIndex]; - currentLength = readNextLengths[readNextIndex]; - - readNextIndex++; - return true; - } - - public byte[] currentBytes() { - return currentBytes; - } - - public int currentOffset() { - return currentOffset; - } - - public int currentLength() { - return currentLength; - } - - public void clear() { - if (fileInputStream != null) { - try { - fileInputStream.close(); - } catch (Throwable ignored) { - } - fileInputStream = null; - } - if (fileOutputStream != null) { - try { - fileOutputStream.close(); - } catch (Throwable ignored) { - } - fileOutputStream = null; - } - - if (parentDir != null) { - try { - FileUtil.fullyDelete(parentDir); - } catch (Throwable ignored) { - } - } - parentDir = null; - tmpFile = null; - isOpen = false; - totalWriteLength = 0; - } - -} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java index a843f48..3db7a00 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java @@ -18,17 +18,26 @@ package org.apache.hadoop.hive.ql.exec.vector.ptf; +import java.io.IOException; import java.util.ArrayList; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorSerializeRow; import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.rowbytescontainer.VectorRowBytesContainer; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.HiveUtils; +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead; +import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import com.google.common.base.Preconditions; @@ -42,10 +51,15 @@ private static final String CLASS_NAME = VectorPTFGroupBatches.class.getName(); private static final Log LOG = LogFactory.getLog(CLASS_NAME); + private Configuration hconf; + private VectorPTFEvaluatorBase[] evaluators; - private int[] outputColumnMap; + private int[] outputProjectionColumnMap; private int[] keyInputColumnMap; + + private int bufferedColumnCount; private int[] bufferedColumnMap; + private TypeInfo[] bufferedTypeInfos; private ArrayList bufferedBatches; @@ -54,31 +68,101 @@ private int allocatedBufferedBatchCount; private int currentBufferedBatchCount; - public VectorPTFGroupBatches() { + private int spillLimitBufferedBatchCount; + private boolean didSpillToDisk; + private String spillLocalDirs; + private long spillRowCount; + private VectorRowBytesContainer spillRowBytesContainer; + + private transient VectorSerializeRow bufferedBatchVectorSerializeRow; + private transient VectorDeserializeRow bufferedBatchVectorDeserializeRow; + + public VectorPTFGroupBatches(Configuration hconf, int vectorizedPTFMaxMemoryBufferingBatchCount) { + this.hconf = hconf; allocatedBufferedBatchCount = 0; currentBufferedBatchCount = 0; + + spillLocalDirs = HiveUtils.getLocalDirList(hconf); + + // Cannot be 0. + spillLimitBufferedBatchCount = Math.max(1, vectorizedPTFMaxMemoryBufferingBatchCount); + + didSpillToDisk = false; + spillLocalDirs = null; + spillRowBytesContainer = null; + bufferedBatchVectorSerializeRow = null; + bufferedBatchVectorDeserializeRow = null; } - public void init(VectorPTFEvaluatorBase[] evaluators, int[] outputColumnMap, - int[] keyInputColumnMap, int[] nonKeyInputColumnMap, int[] streamingColumnMap, + public void init( + TypeInfo[] reducerBatchTypeInfos, + VectorPTFEvaluatorBase[] evaluators, + int[] outputProjectionColumnMap, TypeInfo[] outputTypeInfos, + int[] keyInputColumnMap, int[] nonKeyInputColumnMap, int[] streamingEvaluatorNums, VectorizedRowBatch overflowBatch) { this.evaluators = evaluators; - this.outputColumnMap = outputColumnMap; + this.outputProjectionColumnMap = outputProjectionColumnMap; this.keyInputColumnMap = keyInputColumnMap; + + /* + * If we have more than one group key batch, we will buffer their contents. + * We don't buffer the key columns since they are a constant for the group key. + * + * We buffer the non-key input columns. And, we buffer any streaming columns that will already + * have their output values. + */ final int nonKeyInputColumnCount = nonKeyInputColumnMap.length; - final int streamingColumnCount = streamingColumnMap.length; - final int bufferedColumnCount = nonKeyInputColumnCount + streamingColumnCount; + final int streamingEvaluatorCount = streamingEvaluatorNums.length; + bufferedColumnCount = nonKeyInputColumnCount + streamingEvaluatorCount; bufferedColumnMap = new int[bufferedColumnCount]; + bufferedTypeInfos = new TypeInfo[bufferedColumnCount]; for (int i = 0; i < nonKeyInputColumnCount; i++) { - bufferedColumnMap[i] = nonKeyInputColumnMap[i]; + final int columnNum = nonKeyInputColumnMap[i]; + bufferedColumnMap[i] = columnNum; + bufferedTypeInfos[i] = reducerBatchTypeInfos[columnNum]; } - for (int i = nonKeyInputColumnCount; i < bufferedColumnCount; i++) { - bufferedColumnMap[i] = streamingColumnMap[i - nonKeyInputColumnCount]; + + for (int i = 0; i < streamingEvaluatorCount; i++) { + final int streamingEvaluatorNum = streamingEvaluatorNums[i]; + final int bufferedMapIndex = nonKeyInputColumnCount + i; + bufferedColumnMap[bufferedMapIndex] = outputProjectionColumnMap[streamingEvaluatorNum]; + bufferedTypeInfos[bufferedMapIndex] = outputTypeInfos[streamingEvaluatorNum]; } this.overflowBatch = overflowBatch; bufferedBatches = new ArrayList(0); } + private VectorRowBytesContainer getSpillRowBytesContainer() throws HiveException { + if (spillRowBytesContainer == null) { + spillRowBytesContainer = new VectorRowBytesContainer(spillLocalDirs); + + if (bufferedBatchVectorSerializeRow == null) { + bufferedBatchVectorSerializeRow = + new VectorSerializeRow( + new LazyBinarySerializeWrite(bufferedColumnMap.length)); + + // Deserialize just the columns we a buffered batch, which has only the non-key inputs and + // streamed column outputs. + bufferedBatchVectorSerializeRow.init(bufferedTypeInfos); + + bufferedBatchVectorDeserializeRow = + new VectorDeserializeRow( + new LazyBinaryDeserializeRead( + bufferedTypeInfos, + /* useExternalBuffer */ true)); + + // Deserialize the fields into the *overflow* batch using the buffered batch column map. + bufferedBatchVectorDeserializeRow.init(bufferedColumnMap); + } + } + return spillRowBytesContainer; + } + + private void releaseSpillRowBytesContainer() { + spillRowBytesContainer.clear(); + spillRowBytesContainer = null; + } + public void evaluateStreamingGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) { // Streaming evaluators fill in their results during the evaluate call. @@ -135,25 +219,93 @@ private void forwardBufferedBatches(VectorPTFOperator vecPTFOperator, int index) // buffered VRBs without other columns or scratch columns. VectorizedBatchUtil.swapColumnVector( bufferedBatch, i, overflowBatch, bufferedColumnMap[i]); + } + overflowBatch.size = bufferedBatch.size; + fillGroupResults(overflowBatch); + vecPTFOperator.forward(overflowBatch, null); + } + + private void forwardSpilledBatches(VectorPTFOperator vecPTFOperator, VectorizedRowBatch lastBatch) + throws HiveException { + + overflowBatch.reset(); + copyPartitionAndOrderColumnsToOverflow(lastBatch); + + long spillRowsRead = 0; + try { + VectorRowBytesContainer rowBytesContainer = getSpillRowBytesContainer(); + rowBytesContainer.prepareForReading(); - overflowBatch.size = bufferedBatch.size; - fillGroupResults(overflowBatch); - vecPTFOperator.forward(overflowBatch, null); + while (rowBytesContainer.readNext()) { + + byte[] bytes = rowBytesContainer.currentBytes(); + int offset = rowBytesContainer.currentOffset(); + int length = rowBytesContainer.currentLength(); + + bufferedBatchVectorDeserializeRow.setBytes(bytes, offset, length); + try { + bufferedBatchVectorDeserializeRow.deserialize(overflowBatch, overflowBatch.size); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead detail: " + + bufferedBatchVectorDeserializeRow.getDetailedReadPositionString(), + e); + } + overflowBatch.size++; + spillRowsRead++; + + if (overflowBatch.size == VectorizedRowBatch.DEFAULT_SIZE) { + + fillGroupResults(overflowBatch); + vecPTFOperator.forward(overflowBatch, null); + + overflowBatch.reset(); + copyPartitionAndOrderColumnsToOverflow(lastBatch); + } + } + // Process the row batch that has less than DEFAULT_SIZE rows + if (overflowBatch.size > 0) { + + fillGroupResults(overflowBatch); + vecPTFOperator.forward(overflowBatch, null); + + overflowBatch.reset(); + copyPartitionAndOrderColumnsToOverflow(lastBatch); + } + Preconditions.checkState(spillRowsRead == spillRowCount); + + // For now, throw away file. + releaseSpillRowBytesContainer(); + + } catch (Exception e) { + throw new HiveException(e); + } + } + + private void copyPartitionAndOrderColumnsToOverflow(VectorizedRowBatch lastBatch) { + + // Set partition and order columns in overflowBatch. + // We can set by ref since our last batch is held by us. + final int keyInputColumnCount = keyInputColumnMap.length; + for (int i = 0; i < keyInputColumnCount; i++) { + final int keyColumnNum = keyInputColumnMap[i]; + Preconditions.checkState(overflowBatch.cols[keyColumnNum] != null); + VectorizedBatchUtil.copyRepeatingColumn( + lastBatch, keyColumnNum, overflowBatch, keyColumnNum, /* setByValue */ false); } } public void fillGroupResultsAndForward(VectorPTFOperator vecPTFOperator, VectorizedRowBatch lastBatch) throws HiveException { - if (currentBufferedBatchCount > 0) { - - // Set partition and order columns in overflowBatch. - // We can set by ref since our last batch is held by us. - final int keyInputColumnCount = keyInputColumnMap.length; - for (int i = 0; i < keyInputColumnCount; i++) { - VectorizedBatchUtil.copyRepeatingColumn(lastBatch, i, overflowBatch, i, /* setByValue */ false); - } + if (didSpillToDisk) { + forwardSpilledBatches(vecPTFOperator, lastBatch); + didSpillToDisk = false; + } + if (currentBufferedBatchCount > 0) { + overflowBatch.reset(); + copyPartitionAndOrderColumnsToOverflow(lastBatch); for (int i = 0; i < currentBufferedBatchCount; i++) { forwardBufferedBatches(vecPTFOperator, i); } @@ -167,8 +319,8 @@ public void fillGroupResultsAndForward(VectorPTFOperator vecPTFOperator, int originalProjectionSize = lastBatch.projectionSize; // Project with the output of our operator. - lastBatch.projectionSize = outputColumnMap.length; - lastBatch.projectedColumns = outputColumnMap; + lastBatch.projectionSize = outputProjectionColumnMap.length; + lastBatch.projectedColumns = outputProjectionColumnMap; vecPTFOperator.forward(lastBatch, null); @@ -197,20 +349,62 @@ private VectorizedRowBatch newBufferedBatch(VectorizedRowBatch batch) throws Hiv public void bufferGroupBatch(VectorizedRowBatch batch) throws HiveException { - final int bufferedColumnCount = bufferedColumnMap.length; - if (allocatedBufferedBatchCount <= currentBufferedBatchCount) { - VectorizedRowBatch newBatch = newBufferedBatch(batch); - bufferedBatches.add(newBatch); - allocatedBufferedBatchCount++; - } + try { + // When we've buffered the max allowed, spill the oldest one to make space. + if (currentBufferedBatchCount >= spillLimitBufferedBatchCount) { - VectorizedRowBatch bufferedBatch = bufferedBatches.get(currentBufferedBatchCount++); + VectorRowBytesContainer rowBytesContainer = getSpillRowBytesContainer(); - for (int i = 0; i < bufferedColumnCount; i++) { - VectorizedBatchUtil.swapColumnVector( - batch, bufferedColumnMap[i], bufferedBatch, i); - } + if (!didSpillToDisk) { + // UNDONE: Don't reuse for now. + // rowBytesContainer.resetWrite(); + didSpillToDisk = true; + spillRowCount = 0; + } + + // Grab the oldest in-memory buffered batch and dump it to disk. + VectorizedRowBatch oldestBufferedBatch = bufferedBatches.remove(0); + + final boolean selectedInUse = oldestBufferedBatch.selectedInUse; + int[] selected = oldestBufferedBatch.selected; + final int size = oldestBufferedBatch.size; + for (int logicalIndex = 0; logicalIndex < size; logicalIndex++) { + final int batchIndex = (selectedInUse ? selected[logicalIndex] : logicalIndex); + + Output output = rowBytesContainer.getOuputForRowBytes(); + bufferedBatchVectorSerializeRow.setOutputAppend(output); + bufferedBatchVectorSerializeRow.serializeWrite(oldestBufferedBatch, batchIndex); + rowBytesContainer.finishRow(); + spillRowCount++; + } - bufferedBatch.size = batch.size; + // Put now available buffered batch at end. + oldestBufferedBatch.reset(); + bufferedBatches.add(oldestBufferedBatch); + currentBufferedBatchCount--; + } + + final int bufferedColumnCount = bufferedColumnMap.length; + if (allocatedBufferedBatchCount <= currentBufferedBatchCount) { + VectorizedRowBatch newBatch = newBufferedBatch(batch); + bufferedBatches.add(newBatch); + allocatedBufferedBatchCount++; + } + + VectorizedRowBatch bufferedBatch = bufferedBatches.get(currentBufferedBatchCount++); + + /* + * For better performance, rather than copy the contents of the column vectors we want, + * we swap for them. We assume the Reducer doesn't care. + */ + for (int i = 0; i < bufferedColumnCount; i++) { + VectorizedBatchUtil.swapColumnVector( + batch, bufferedColumnMap[i], bufferedBatch, i); + } + + bufferedBatch.size = batch.size; + } catch (IOException e) { + throw new HiveException(e); + } } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java index 7522624..0d72ba8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java @@ -97,10 +97,9 @@ * PTF vector expressions. */ - // This is map of which vectorized row batch columns are the input columns and the group value - // (aggregation) output columns. - // And, their types. - private int[] outputColumnMap; + private TypeInfo[] reducerBatchTypeInfos; + + private int[] outputProjectionColumnMap; private String[] outputColumnNames; private TypeInfo[] outputTypeInfos; @@ -135,7 +134,7 @@ private transient VectorPTFEvaluatorBase[] evaluators; - private transient int[] streamingColumnMap; + private transient int[] streamingEvaluatorNums; private transient boolean allEvaluatorsAreStreaming; @@ -179,11 +178,13 @@ public VectorPTFOperator(CompilationOpContext ctx, vectorPTFInfo = vectorDesc.getVectorPTFInfo(); this.vContext = vContext; + reducerBatchTypeInfos = vectorDesc.getReducerBatchTypeInfos(); + isPartitionOrderBy = vectorDesc.getIsPartitionOrderBy(); outputColumnNames = vectorDesc.getOutputColumnNames(); outputTypeInfos = vectorDesc.getOutputTypeInfos(); - outputColumnMap = vectorPTFInfo.getOutputColumnMap(); + outputProjectionColumnMap = vectorPTFInfo.getOutputColumnMap(); /* * Create a new vectorization context to create a new projection, but keep @@ -222,7 +223,7 @@ protected void setupVOutContext() { final int count = outputColumnNames.length; for (int i = 0; i < count; ++i) { String columnName = outputColumnNames[i]; - int outputColumn = outputColumnMap[i]; + int outputColumn = outputProjectionColumnMap[i]; vOutContext.addProjectionColumn(columnName, outputColumn); } } @@ -255,8 +256,8 @@ protected VectorizedRowBatch setupOverflowBatch() throws HiveException { overflowBatch = new VectorizedRowBatch(totalNumColumns); // First, just allocate just the output columns we will be using. - for (int i = 0; i < outputColumnMap.length; i++) { - int outputColumn = outputColumnMap[i]; + for (int i = 0; i < outputProjectionColumnMap.length; i++) { + int outputColumn = outputProjectionColumnMap[i]; String typeName = outputTypeInfos[i].getTypeName(); allocateOverflowBatchColumnVector(overflowBatch, outputColumn, typeName); } @@ -267,8 +268,8 @@ protected VectorizedRowBatch setupOverflowBatch() throws HiveException { allocateOverflowBatchColumnVector(overflowBatch, outputColumn++, typeName); } - overflowBatch.projectedColumns = outputColumnMap; - overflowBatch.projectionSize = outputColumnMap.length; + overflowBatch.projectedColumns = outputProjectionColumnMap; + overflowBatch.projectionSize = outputProjectionColumnMap.length; overflowBatch.reset(); @@ -311,18 +312,26 @@ protected void initializeOp(Configuration hconf) throws HiveException { evaluators = VectorPTFDesc.getEvaluators(vectorDesc, vectorPTFInfo); - streamingColumnMap = VectorPTFDesc.getStreamingColumnMap(evaluators); + streamingEvaluatorNums = VectorPTFDesc.getStreamingEvaluatorNums(evaluators); - allEvaluatorsAreStreaming = (streamingColumnMap.length == evaluatorCount); + allEvaluatorsAreStreaming = (streamingEvaluatorNums.length == evaluatorCount); /* * Setup the overflow batch. */ overflowBatch = setupOverflowBatch(); - groupBatches = new VectorPTFGroupBatches(); + groupBatches = new VectorPTFGroupBatches( + hconf, vectorDesc.getVectorizedPTFMaxMemoryBufferingBatchCount()); groupBatches.init( - evaluators, outputColumnMap, keyInputColumnMap, nonKeyInputColumnMap, streamingColumnMap, overflowBatch); + reducerBatchTypeInfos, + evaluators, + outputProjectionColumnMap, + outputTypeInfos, + keyInputColumnMap, + nonKeyInputColumnMap, + streamingEvaluatorNums, + overflowBatch); isFirstPartition = true; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/rowbytescontainer/VectorRowBytesContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/rowbytescontainer/VectorRowBytesContainer.java new file mode 100644 index 0000000..4b801cc --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/rowbytescontainer/VectorRowBytesContainer.java @@ -0,0 +1,331 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.rowbytescontainer; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.serde2.ByteStream.Output; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; + +/** + * An eager bytes container that puts row bytes to an output stream. + */ +public class VectorRowBytesContainer { + + private static final Logger LOG = LoggerFactory.getLogger(VectorRowBytesContainer.class); + + private File parentDir; + private File tmpFile; + + // We buffer in a org.apache.hadoop.hive.serde2.ByteStream.Output since that is what + // is used by VectorSerializeRow / SerializeWrite. Periodically, we flush this buffer + // to disk. + private Output output; + private int rowBeginPos; + private static final int OUTPUT_SIZE = 4096; + private static final int THRESHOLD = 8 * (OUTPUT_SIZE / 10); + private static final int INPUT_SIZE = 4096; + + private FileOutputStream fileOutputStream; + + private boolean isOpen; + + private byte[] readBuffer; + private byte[] largeRowBuffer; + private int readOffset; + private int readLength; + + private int readNextCount; + private int readNextIndex; + + private static final int MAX_READS = 256; + private byte[][] readNextBytes; + private int readNextOffsets[]; + private int readNextLengths[]; + + private byte[] currentBytes; + private int currentOffset; + private int currentLength; + + private long totalWriteLength; + private long totalReadLength; + + private FileInputStream fileInputStream; + + private final String spillLocalDirs; + + public VectorRowBytesContainer(String spillLocalDirs) { + output = new Output(); + readBuffer = new byte[INPUT_SIZE]; + readNextBytes = new byte[MAX_READS][]; + readNextOffsets = new int[MAX_READS]; + readNextLengths = new int[MAX_READS]; + isOpen = false; + totalWriteLength = 0; + totalReadLength = 0; + this.spillLocalDirs = spillLocalDirs; + } + + private void setupOutputFileStreams() throws IOException { + parentDir = FileUtils.createLocalDirsTempFile(spillLocalDirs, "bytes-container", "", true); + parentDir.deleteOnExit(); + tmpFile = File.createTempFile("BytesContainer", ".tmp", parentDir); + LOG.debug("BytesContainer created temp file " + tmpFile.getAbsolutePath()); + tmpFile.deleteOnExit(); + + fileOutputStream = new FileOutputStream(tmpFile); + } + + private void initFile() { + try { + setupOutputFileStreams(); + } catch (IOException e) { + throw new RuntimeException("Failed to create temporary output file on disk", e); + } + } + + public Output getOuputForRowBytes() { + if (!isOpen) { + initFile(); + isOpen = true; + } + // Reserve space for the int length. + output.reserve(4); + rowBeginPos = output.getLength(); + return output; + } + + public void finishRow() throws IOException { + int length = output.getLength() - rowBeginPos; + output.writeInt(rowBeginPos - 4, length); + if (output.getLength() > THRESHOLD) { + fileOutputStream.write(output.getData(), 0, output.getLength()); + totalWriteLength += output.getLength(); + output.reset(); + } + } + + public void prepareForReading() throws IOException { + if (!isOpen) { + return; + } + if (output.getLength() > 0) { + fileOutputStream.write(output.getData(), 0, output.getLength()); + totalWriteLength += output.getLength(); + fileOutputStream.flush(); + output.reset(); + } + if (fileInputStream != null) { + fileInputStream.close(); + } + fileInputStream = new FileInputStream(tmpFile); + readNextIndex = 0; + readNextCount = 0; + } + + private int readInt() { + int value = (((readBuffer[readOffset] & 0xFF) << 24) | + ((readBuffer[readOffset + 1] & 0xFF) << 16) | + ((readBuffer[readOffset + 2] & 0xFF) << 8) | + ((readBuffer[readOffset + 3] & 0xFF))); + readOffset += 4; + return value; + } + + // Call when nextReadIndex == nextReadCount. + private void bufferedRead() throws IOException { + + // Reset for reading. + readNextIndex = 0; + + // Reset for filling. + readNextCount = 0; + + if (readOffset < readLength) { + // Move unprocessed remainder to beginning of buffer. + int unprocessLength = readLength - readOffset; + System.arraycopy(readBuffer, readOffset, readBuffer, 0, unprocessLength); + + int maxReadLength = readBuffer.length - unprocessLength; + int partialReadLength = fileInputStream.read(readBuffer, unprocessLength, maxReadLength); + if (partialReadLength == -1) { + partialReadLength = 0; + } + totalReadLength += partialReadLength; + readLength = unprocessLength + partialReadLength; + readOffset = 0; + } else { + readOffset = 0; + readLength = fileInputStream.read(readBuffer, 0, readBuffer.length); + if (readLength == -1) { + readLength = 0; + } + totalReadLength += readLength; + } + if (readLength == 0) { + return; + } + if (readLength < 0) { + throw new IOException("Negative read length"); + } + + // Get length word. + if (readLength < 4) { + throw new IOException("Expecting 4 byte length"); + } + + while (true) { + // Use Input class to read length. + int saveReadOffset = readOffset; + int rowLength = readInt(); + if (rowLength < 0) { + throw new IOException("Negative row length"); + } + int remainingLength = readLength - readOffset; + if (remainingLength < rowLength) { + if (readNextCount > 0) { + // Leave this one for the next round. + readOffset = saveReadOffset; + break; + } + + // Buffer needed to bridge. + if (largeRowBuffer == null || largeRowBuffer.length < rowLength) { + int newLargeBufferLength = Math.max(Integer.highestOneBit(rowLength) << 1, INPUT_SIZE); + largeRowBuffer = new byte[newLargeBufferLength]; + } + System.arraycopy(readBuffer, readOffset, largeRowBuffer, 0, remainingLength); + int expectedPartialLength = rowLength - remainingLength; + int partialReadLength = fileInputStream.read(largeRowBuffer, remainingLength, expectedPartialLength); + if (partialReadLength == -1) { + throw new IOException("Unexpected EOF (total write length " + totalWriteLength + + ", total read length " + totalReadLength + ", read length " + + expectedPartialLength + ")"); + } + + if (expectedPartialLength != partialReadLength) { + throw new IOException("Unable to read a complete row of length " + rowLength + + " (total write length " + totalWriteLength + + ", total read length " + totalReadLength + ", read length " + + expectedPartialLength + ", actual length " + partialReadLength + ")"); + } + totalReadLength += partialReadLength; + + readNextBytes[readNextCount] = largeRowBuffer; + readNextOffsets[readNextCount] = 0; + readNextLengths[readNextCount] = rowLength; + + // Indicate we used the last row's bytes for large buffer. + readOffset = readLength; + readNextCount++; + break; + } + + readNextBytes[readNextCount] = readBuffer; + readNextOffsets[readNextCount] = readOffset; + readNextLengths[readNextCount] = rowLength; + readOffset += rowLength; + readNextCount++; + + if (readNextCount >= readNextBytes.length){ + break; + } + if (readLength - readOffset < 4) { + // Handle in next round. + break; + } + } + } + + public boolean readNext() throws IOException { + if (!isOpen) { + return false; + } + if (readNextIndex >= readNextCount) { + bufferedRead(); + // Any more left? + if (readNextIndex >= readNextCount) { + return false; + } + } + + currentBytes = readNextBytes[readNextIndex]; + currentOffset = readNextOffsets[readNextIndex]; + currentLength = readNextLengths[readNextIndex]; + + readNextIndex++; + return true; + } + + public byte[] currentBytes() { + return currentBytes; + } + + public int currentOffset() { + return currentOffset; + } + + public int currentLength() { + return currentLength; + } + + public void resetWrite() throws IOException { + if (!isOpen) { + return; + } + + // Truncate by re-opening FileOutputStream. + fileOutputStream.close(); + fileOutputStream = new FileOutputStream(tmpFile); + } + + public void clear() { + if (fileInputStream != null) { + try { + fileInputStream.close(); + } catch (Throwable ignored) { + } + fileInputStream = null; + } + if (fileOutputStream != null) { + try { + fileOutputStream.close(); + } catch (Throwable ignored) { + } + fileOutputStream = null; + } + + if (parentDir != null) { + try { + FileUtil.fullyDelete(parentDir); + } catch (Throwable ignored) { + } + } + parentDir = null; + tmpFile = null; + isOpen = false; + totalWriteLength = 0; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 13d7730..c839f7f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -294,6 +294,8 @@ private boolean isVectorizationGroupByComplexTypesEnabled; private boolean isVectorizedRowIdentifierEnabled; private Collection> rowDeserializeInputFormatExcludes; + private int vectorizedPTFMaxMemoryBufferingBatchCount; + private int vectorizedTestingReducerBatchSize; private boolean isSchemaEvolution; @@ -1265,6 +1267,7 @@ private void convertReduceWork(ReduceWork reduceWork) throws SemanticException { vectorTaskColumnInfo.assume(); reduceWork.setVectorizedVertexNum(++vectorizedVertexNum); + reduceWork.setVectorizedTestingReducerBatchSize(vectorizedTestingReducerBatchSize); boolean ret; try { @@ -1379,7 +1382,8 @@ private boolean validateReduceWork(ReduceWork reduceWork, } // Now check the reduce operator tree. Map opRules = new LinkedHashMap(); - ReduceWorkValidationNodeProcessor vnp = new ReduceWorkValidationNodeProcessor(); + ReduceWorkValidationNodeProcessor vnp = + new ReduceWorkValidationNodeProcessor(vectorTaskColumnInfo); addReduceWorkRules(opRules, vnp); Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null); GraphWalker ogw = new DefaultGraphWalker(disp); @@ -1529,6 +1533,14 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, class ReduceWorkValidationNodeProcessor implements NodeProcessor { + private final VectorTaskColumnInfo vectorTaskColumnInfo; + private final TypeInfo[] reducerBatchTypeInfos; + + public ReduceWorkValidationNodeProcessor(VectorTaskColumnInfo vectorTaskColumnInfo) { + this.vectorTaskColumnInfo = vectorTaskColumnInfo; + reducerBatchTypeInfos = vectorTaskColumnInfo.allTypeInfos.toArray(new TypeInfo[0]); + } + // Children of Vectorized GROUPBY that outputs rows instead of vectorized row batchs. protected final Set> nonVectorizedOps = new HashSet>(); @@ -1550,7 +1562,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return new Boolean(true); } currentOperator = op; - boolean ret = validateReduceWorkOperator(op); + boolean ret = validateReduceWorkOperator(op, reducerBatchTypeInfos); if (!ret) { return new Boolean(false); } @@ -1849,6 +1861,13 @@ public PhysicalContext resolve(PhysicalContext physicalContext) throws SemanticE HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_ROW_IDENTIFIER_ENABLED); + vectorizedPTFMaxMemoryBufferingBatchCount = + HiveConf.getIntVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_PTF_MAX_MEMORY_BUFFERING_BATCH_COUNT); + vectorizedTestingReducerBatchSize = + HiveConf.getIntVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_TESTING_REDUCER_BATCH_SIZE); + isSchemaEvolution = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_SCHEMA_EVOLUTION); @@ -1940,7 +1959,8 @@ boolean validateMapWorkOperator(Operator op, MapWork mWo return ret; } - boolean validateReduceWorkOperator(Operator op) { + boolean validateReduceWorkOperator(Operator op, + TypeInfo[] reducerBatchTypeInfos) { boolean ret; switch (op.getType()) { case MAPJOIN: @@ -1985,7 +2005,8 @@ boolean validateReduceWorkOperator(Operator op) { validateSparkHashTableSinkOperator((SparkHashTableSinkOperator) op); break; case PTF: - ret = validatePTFOperator((PTFOperator) op); + // PTF needs the TypeInfo of the reducer batch. + ret = validatePTFOperator((PTFOperator) op, reducerBatchTypeInfos); break; default: setOperatorNotSupported(op); @@ -2276,7 +2297,7 @@ private boolean containsLeadLag(List exprNodeDescList) { return false; } - private boolean validatePTFOperator(PTFOperator op) { + private boolean validatePTFOperator(PTFOperator op, TypeInfo[] reducerBatchTypeInfos) { if (!isPtfVectorizationEnabled) { setNodeIssue("Vectorization of PTF is not enabled (" + @@ -2312,7 +2333,8 @@ private boolean validatePTFOperator(PTFOperator op) { VectorPTFDesc vectorPTFDesc = null; try { - vectorPTFDesc = createVectorPTFDesc(op, ptfDesc); + vectorPTFDesc = createVectorPTFDesc( + op, ptfDesc, reducerBatchTypeInfos, vectorizedPTFMaxMemoryBufferingBatchCount); } catch (HiveException e) { setOperatorIssue("exception: " + VectorizationContext.getStackTraceAsSingleLine(e)); return false; @@ -3824,7 +3846,8 @@ private static void fillInPTFEvaluators( * VectorizationContext to lookup column names, etc. */ private static VectorPTFDesc createVectorPTFDesc(Operator ptfOp, - PTFDesc ptfDesc) throws HiveException { + PTFDesc ptfDesc, TypeInfo[] reducerBatchTypeInfos, + int vectorizedPTFMaxMemoryBufferingBatchCount) throws HiveException { PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef(); @@ -3838,6 +3861,8 @@ private static VectorPTFDesc createVectorPTFDesc(Operator keyInputColumns, ArrayList nonKeyInputColumns) { - final int outputSize = outputColumnMap.length; + final int outputSize = outputColumnProjectionMap.length; final int orderKeyCount = orderColumnMap.length; final int partitionKeyCount = (isPartitionOrderBy ? partitionColumnMap.length : 0); for (int i = evaluatorCount; i < outputSize; i++) { - final int nonEvalColumnNum = outputColumnMap[i]; + final int nonEvalColumnNum = outputColumnProjectionMap[i]; boolean isKey = false; for (int o = 0; o < orderKeyCount; o++) { if (nonEvalColumnNum == orderColumnMap[o]) { @@ -3946,7 +3978,8 @@ private static void determineKeyAndNonKeyInputColumnMap(int[] outputColumnMap, * execution. */ private static VectorPTFInfo createVectorPTFInfo(Operator ptfOp, - PTFDesc ptfDesc, VectorizationContext vContext) throws HiveException { + PTFDesc ptfDesc, VectorizationContext vContext) + throws HiveException { PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef(); @@ -3967,17 +4000,21 @@ private static VectorPTFInfo createVectorPTFInfo(Operator keyInputColumns = new ArrayList(); ArrayList nonKeyInputColumns = new ArrayList(); - determineKeyAndNonKeyInputColumnMap(outputColumnMap, isPartitionOrderBy, orderColumnMap, + determineKeyAndNonKeyInputColumnMap(outputColumnProjectionMap, isPartitionOrderBy, orderColumnMap, partitionColumnMap, evaluatorCount, keyInputColumns, nonKeyInputColumns); int[] keyInputColumnMap = ArrayUtils.toPrimitive(keyInputColumns.toArray(new Integer[0])); int[] nonKeyInputColumnMap = ArrayUtils.toPrimitive(nonKeyInputColumns.toArray(new Integer[0])); @@ -4064,7 +4101,7 @@ private static VectorPTFInfo createVectorPTFInfo(Operator streamingColumns = new ArrayList(); + ArrayList streamingEvaluatorNums = new ArrayList(); for (int i = 0; i < evaluatorCount; i++) { final VectorPTFEvaluatorBase evaluator = evaluators[i]; if (evaluator.streamsResult()) { - streamingColumns.add(evaluator.getOutputColumnNum()); + streamingEvaluatorNums.add(i); } } - return ArrayUtils.toPrimitive(streamingColumns.toArray(new Integer[0])); + return ArrayUtils.toPrimitive(streamingEvaluatorNums.toArray(new Integer[0])); + } + + public TypeInfo[] getReducerBatchTypeInfos() { + return reducerBatchTypeInfos; + } + + public void setReducerBatchTypeInfos(TypeInfo[] reducerBatchTypeInfos) { + this.reducerBatchTypeInfos = reducerBatchTypeInfos; } public boolean getIsPartitionOrderBy() { @@ -357,4 +372,14 @@ public void setVectorPTFInfo(VectorPTFInfo vectorPTFInfo) { public VectorPTFInfo getVectorPTFInfo() { return vectorPTFInfo; } + + public void setVectorizedPTFMaxMemoryBufferingBatchCount( + int vectorizedPTFMaxMemoryBufferingBatchCount) { + this.vectorizedPTFMaxMemoryBufferingBatchCount = vectorizedPTFMaxMemoryBufferingBatchCount; + } + + public int getVectorizedPTFMaxMemoryBufferingBatchCount() { + return vectorizedPTFMaxMemoryBufferingBatchCount; + } + } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowBytesContainer.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowBytesContainer.java new file mode 100644 index 0000000..d9e2dbd --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowBytesContainer.java @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.util.Random; + +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.RandomByteArrayStream; +import org.apache.hadoop.hive.ql.exec.vector.rowbytescontainer.VectorRowBytesContainer; +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.junit.Test; + +import static org.junit.Assert.*; + +public class TestVectorRowBytesContainer { + + public void doFillReplay(Random random, int maxCount) throws Exception { + + RandomByteArrayStream randomByteArrayStream = new RandomByteArrayStream(random); + VectorRowBytesContainer vectorMapJoinRowBytesContainer = + new VectorRowBytesContainer(null); + + int count = Math.min(maxCount, random.nextInt(500)); + for (int i = 0; i < count; i++) { + byte[] bytes = randomByteArrayStream.next(); + Output output = vectorMapJoinRowBytesContainer.getOuputForRowBytes(); + output.write(bytes); + vectorMapJoinRowBytesContainer.finishRow(); + } + vectorMapJoinRowBytesContainer.prepareForReading(); + + for (int i = 0; i < count; i++) { + if (!vectorMapJoinRowBytesContainer.readNext()) { + assertTrue(false); + } + byte[] readBytes = vectorMapJoinRowBytesContainer.currentBytes(); + int readOffset = vectorMapJoinRowBytesContainer.currentOffset(); + int readLength = vectorMapJoinRowBytesContainer.currentLength(); + byte[] expectedBytes = randomByteArrayStream.get(i); + if (readLength != expectedBytes.length) { + assertTrue(false); + } + for (int j = 0; j < readLength; j++) { + byte readByte = readBytes[readOffset + j]; + byte expectedByte = expectedBytes[j]; + if (readByte != expectedByte) { + assertTrue(false); + } + } + } + } + + @Test + public void testFillReplay() throws Exception { + Random random = new Random(47496); + + for (int i = 0; i < 10; i++) { + doFillReplay(random, 1 << i); + } + } +} \ No newline at end of file diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinRowBytesContainer.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinRowBytesContainer.java deleted file mode 100644 index afe4e70..0000000 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinRowBytesContainer.java +++ /dev/null @@ -1,75 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; - -import java.util.Random; - -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinRowBytesContainer; -import org.apache.hadoop.hive.serde2.ByteStream.Output; -import org.junit.Test; - -import static org.junit.Assert.*; - -public class TestVectorMapJoinRowBytesContainer { - - public void doFillReplay(Random random, int maxCount) throws Exception { - - RandomByteArrayStream randomByteArrayStream = new RandomByteArrayStream(random); - VectorMapJoinRowBytesContainer vectorMapJoinRowBytesContainer = - new VectorMapJoinRowBytesContainer(null); - - int count = Math.min(maxCount, random.nextInt(500)); - for (int i = 0; i < count; i++) { - byte[] bytes = randomByteArrayStream.next(); - Output output = vectorMapJoinRowBytesContainer.getOuputForRowBytes(); - output.write(bytes); - vectorMapJoinRowBytesContainer.finishRow(); - } - vectorMapJoinRowBytesContainer.prepareForReading(); - - for (int i = 0; i < count; i++) { - if (!vectorMapJoinRowBytesContainer.readNext()) { - assertTrue(false); - } - byte[] readBytes = vectorMapJoinRowBytesContainer.currentBytes(); - int readOffset = vectorMapJoinRowBytesContainer.currentOffset(); - int readLength = vectorMapJoinRowBytesContainer.currentLength(); - byte[] expectedBytes = randomByteArrayStream.get(i); - if (readLength != expectedBytes.length) { - assertTrue(false); - } - for (int j = 0; j < readLength; j++) { - byte readByte = readBytes[readOffset + j]; - byte expectedByte = expectedBytes[j]; - if (readByte != expectedByte) { - assertTrue(false); - } - } - } - } - - @Test - public void testFillReplay() throws Exception { - Random random = new Random(47496); - - for (int i = 0; i < 10; i++) { - doFillReplay(random, 1 << i); - } - } -} \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_ptf_part_simple.q ql/src/test/queries/clientpositive/vector_ptf_part_simple.q index bd3b3e4..fc9f9eb 100644 --- ql/src/test/queries/clientpositive/vector_ptf_part_simple.q +++ ql/src/test/queries/clientpositive/vector_ptf_part_simple.q @@ -502,3 +502,50 @@ from vector_ptf_part_simple_orc; select p_mfgr, p_name, p_retailprice, rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r from vector_ptf_part_simple_orc; + + +-- +-- Run some tests with these parameters that force spilling to disk. +-- +set hive.vectorized.ptf.max.memory.buffering.batch.count=1; +set hive.vectorized.testing.reducer.batch.size=2; + +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr) as rn, +rank() over(partition by p_mfgr) as r, +dense_rank() over(partition by p_mfgr) as dr, +first_value(p_retailprice) over(partition by p_mfgr) as fv, +last_value(p_retailprice) over(partition by p_mfgr) as lv, +count(p_retailprice) over(partition by p_mfgr) as c, +count(*) over(partition by p_mfgr) as cs +from vector_ptf_part_simple_orc; + +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name) as rn, +rank() over(partition by p_mfgr order by p_name) as r, +dense_rank() over(partition by p_mfgr order by p_name) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name) as c, +count(*) over(partition by p_mfgr order by p_name) as cs +from vector_ptf_part_simple_orc; + + +select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr) as r +from vector_ptf_part_simple_orc; + +select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr order by p_name) as r +from vector_ptf_part_simple_orc; + + +select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r +from vector_ptf_part_simple_orc; + + +select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end) as r +from vector_ptf_part_simple_orc; + diff --git ql/src/test/queries/clientpositive/vector_windowing_expressions.q ql/src/test/queries/clientpositive/vector_windowing_expressions.q index 7d8c5d5..6a37c4e 100644 --- ql/src/test/queries/clientpositive/vector_windowing_expressions.q +++ ql/src/test/queries/clientpositive/vector_windowing_expressions.q @@ -92,3 +92,38 @@ round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50. from part window w1 as (distribute by p_mfgr sort by p_retailprice) limit 11; + + +-- +-- Run some tests with these parameters that force spilling to disk. +-- +set hive.vectorized.ptf.max.memory.buffering.batch.count=1; +set hive.vectorized.testing.reducer.batch.size=2; + +select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), +max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1 +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +; + +select p_mfgr, p_retailprice, p_size, +rank() over (distribute by p_mfgr sort by p_retailprice) as r, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1 +from part +; + +select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part; + +select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part; + +from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select * ; +select * from t1 limit 3; +select * from t2 limit 3; + +select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2) +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +limit 11; diff --git ql/src/test/results/clientpositive/llap/vector_empty_where.q.out ql/src/test/results/clientpositive/llap/vector_empty_where.q.out index b250332..e8de240 100644 --- ql/src/test/results/clientpositive/llap/vector_empty_where.q.out +++ ql/src/test/results/clientpositive/llap/vector_empty_where.q.out @@ -56,7 +56,7 @@ STAGE PLANS: keys: cint (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -65,7 +65,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -98,7 +98,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col0) Group By Vectorization: @@ -219,7 +219,7 @@ STAGE PLANS: keys: cint (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -228,7 +228,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -261,7 +261,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col0) Group By Vectorization: @@ -390,7 +390,7 @@ STAGE PLANS: keys: cint (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -399,7 +399,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -432,7 +432,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col0) Group By Vectorization: @@ -561,7 +561,7 @@ STAGE PLANS: keys: cint (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -570,7 +570,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -603,7 +603,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3016 Data size: 9008 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col0) Group By Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out index 805d5a2..01de01b 100644 --- ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out +++ ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out @@ -3093,9 +3093,9 @@ Manufacturer#1 almond aquamarine pink moccasin thistle NULL 17276.33 1173.15 175 Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 Manufacturer#2 almond antique violet chocolate turquoise 1690.68 1690.68 1690.68 1690.68 1690.68 -Manufacturer#2 almond antique violet turquoise frosted 1800.7 7092.780000000001 1690.68 1800.7 1773.1950000000002 -Manufacturer#2 almond antique violet turquoise frosted 1800.7 7092.780000000001 1690.68 1800.7 1773.1950000000002 -Manufacturer#2 almond antique violet turquoise frosted 1800.7 7092.780000000001 1690.68 1800.7 1773.1950000000002 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 7092.78 1690.68 1800.7 1773.195 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 7092.78 1690.68 1800.7 1773.195 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 7092.78 1690.68 1800.7 1773.195 Manufacturer#2 almond aquamarine midnight light salmon 2031.98 9124.76 1690.68 2031.98 1824.952 Manufacturer#2 almond aquamarine rose maroon antique 900.66 11724.08 900.66 2031.98 1674.8685714285714 Manufacturer#2 almond aquamarine rose maroon antique 1698.66 11724.08 900.66 2031.98 1674.8685714285714 @@ -3322,9 +3322,9 @@ Manufacturer#1 almond aquamarine pink moccasin thistle NULL 17276.33 1173.15 175 Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 Manufacturer#2 almond antique violet chocolate turquoise 1690.68 1690.68 1690.68 1690.68 1690.68 -Manufacturer#2 almond antique violet turquoise frosted 1800.7 7092.780000000001 1690.68 1800.7 1773.1950000000002 -Manufacturer#2 almond antique violet turquoise frosted 1800.7 7092.780000000001 1690.68 1800.7 1773.1950000000002 -Manufacturer#2 almond antique violet turquoise frosted 1800.7 7092.780000000001 1690.68 1800.7 1773.1950000000002 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 7092.78 1690.68 1800.7 1773.195 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 7092.78 1690.68 1800.7 1773.195 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 7092.78 1690.68 1800.7 1773.195 Manufacturer#2 almond aquamarine midnight light salmon 2031.98 9124.76 1690.68 2031.98 1824.952 Manufacturer#2 almond aquamarine rose maroon antique 900.66 11724.08 900.66 2031.98 1674.8685714285714 Manufacturer#2 almond aquamarine rose maroon antique 1698.66 11724.08 900.66 2031.98 1674.8685714285714 @@ -3740,15 +3740,15 @@ Manufacturer#5 almond antique blue firebrick mint 1789.69 1789.69 1789.69 1789.6 Manufacturer#1 almond antique burnished rose metallic 1173.15 4135.99 1173.15 1789.69 1378.6633333333332 Manufacturer#1 almond antique burnished rose metallic 1173.15 4135.99 1173.15 1789.69 1378.6633333333332 Manufacturer#3 almond antique chartreuse khaki white 99.68 4235.67 99.68 1789.69 1058.9175 -Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 11250.71 99.68 1789.69 1406.33875 -Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 11250.71 99.68 1789.69 1406.33875 -Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 11250.71 99.68 1789.69 1406.33875 -Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 11250.71 99.68 1789.69 1406.33875 -Manufacturer#3 almond antique forest lavender goldenrod NULL 14221.519999999999 99.68 1789.69 1292.8654545454544 -Manufacturer#3 almond antique forest lavender goldenrod 590.27 14221.519999999999 99.68 1789.69 1292.8654545454544 -Manufacturer#3 almond antique forest lavender goldenrod 1190.27 14221.519999999999 99.68 1789.69 1292.8654545454544 -Manufacturer#3 almond antique forest lavender goldenrod 1190.27 14221.519999999999 99.68 1789.69 1292.8654545454544 -Manufacturer#4 almond antique gainsboro frosted violet NULL 14221.519999999999 99.68 1789.69 1292.8654545454544 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 11250.710000000001 99.68 1789.69 1406.3387500000001 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 11250.710000000001 99.68 1789.69 1406.3387500000001 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 11250.710000000001 99.68 1789.69 1406.3387500000001 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 11250.710000000001 99.68 1789.69 1406.3387500000001 +Manufacturer#3 almond antique forest lavender goldenrod NULL 14221.52 99.68 1789.69 1292.8654545454547 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 14221.52 99.68 1789.69 1292.8654545454547 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 14221.52 99.68 1789.69 1292.8654545454547 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 14221.52 99.68 1789.69 1292.8654545454547 +Manufacturer#4 almond antique gainsboro frosted violet NULL 14221.52 99.68 1789.69 1292.8654545454547 Manufacturer#5 almond antique medium spring khaki 1611.66 17444.84 99.68 1789.69 1341.9107692307693 Manufacturer#5 almond antique medium spring khaki 1611.66 17444.84 99.68 1789.69 1341.9107692307693 Manufacturer#3 almond antique metallic orange dim 55.39 17500.23 55.39 1789.69 1250.0164285714286 @@ -3758,24 +3758,24 @@ Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 22363.09 55.39 Manufacturer#5 almond antique sky peru orange 1788.73 24151.82 55.39 1922.98 1341.7677777777778 Manufacturer#2 almond antique violet chocolate turquoise 1690.68 25842.5 55.39 1922.98 1360.1315789473683 Manufacturer#4 almond antique violet mint lemon 1375.42 27217.92 55.39 1922.98 1360.896 -Manufacturer#2 almond antique violet turquoise frosted 1800.7 32620.019999999997 55.39 1922.98 1418.2617391304345 -Manufacturer#2 almond antique violet turquoise frosted 1800.7 32620.019999999997 55.39 1922.98 1418.2617391304345 -Manufacturer#2 almond antique violet turquoise frosted 1800.7 32620.019999999997 55.39 1922.98 1418.2617391304345 -Manufacturer#1 almond aquamarine burnished black steel 1414.42 34034.439999999995 55.39 1922.98 1418.1016666666665 -Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 35052.53999999999 55.39 1922.98 1402.1015999999997 -Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 36258.799999999996 55.39 1922.98 1394.5692307692307 -Manufacturer#4 almond aquamarine floral ivory bisque NULL 36258.799999999996 55.39 1922.98 1394.5692307692307 -Manufacturer#2 almond aquamarine midnight light salmon 2031.98 38290.78 55.39 2031.98 1418.177037037037 -Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 43188.76 55.39 2031.98 1439.6253333333334 -Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 43188.76 55.39 2031.98 1439.6253333333334 -Manufacturer#1 almond aquamarine pink moccasin thistle NULL 43188.76 55.39 2031.98 1439.6253333333334 -Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 43188.76 55.39 2031.98 1439.6253333333334 -Manufacturer#2 almond aquamarine rose maroon antique 900.66 45788.08 55.39 2031.98 1430.8775 -Manufacturer#2 almond aquamarine rose maroon antique 1698.66 45788.08 55.39 2031.98 1430.8775 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 46788.68 55.39 2031.98 1417.8387878787878 -Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 48633.6 55.39 2031.98 1430.3999999999999 -Manufacturer#4 almond azure aquamarine papaya violet 1290.35 49923.95 55.39 2031.98 1426.3985714285714 -Manufacturer#5 almond azure blanched chiffon midnight 1464.48 51388.43 55.39 2031.98 1427.4563888888888 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 32620.02 55.39 1922.98 1418.2617391304348 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 32620.02 55.39 1922.98 1418.2617391304348 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 32620.02 55.39 1922.98 1418.2617391304348 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 34034.44 55.39 1922.98 1418.1016666666667 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 35052.54 55.39 1922.98 1402.1016 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 36258.8 55.39 1922.98 1394.569230769231 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 36258.8 55.39 1922.98 1394.569230769231 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 38290.780000000006 55.39 2031.98 1418.1770370370373 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 43188.76000000001 55.39 2031.98 1439.6253333333336 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 43188.76000000001 55.39 2031.98 1439.6253333333336 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 43188.76000000001 55.39 2031.98 1439.6253333333336 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 43188.76000000001 55.39 2031.98 1439.6253333333336 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 45788.08000000001 55.39 2031.98 1430.8775000000003 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 45788.08000000001 55.39 2031.98 1430.8775000000003 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 46788.68000000001 55.39 2031.98 1417.838787878788 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 48633.600000000006 55.39 2031.98 1430.4 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 49923.950000000004 55.39 2031.98 1426.3985714285716 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 51388.43000000001 55.39 2031.98 1427.456388888889 PREHOOK: query: explain vectorization detail select p_mfgr,p_name, p_retailprice, sum(p_retailprice) over(order by p_name range between unbounded preceding and current row) as s, @@ -3971,15 +3971,15 @@ Manufacturer#5 almond antique blue firebrick mint 1789.69 1789.69 1789.69 1789.6 Manufacturer#1 almond antique burnished rose metallic 1173.15 4135.99 1173.15 1789.69 1378.6633333333332 Manufacturer#1 almond antique burnished rose metallic 1173.15 4135.99 1173.15 1789.69 1378.6633333333332 Manufacturer#3 almond antique chartreuse khaki white 99.68 4235.67 99.68 1789.69 1058.9175 -Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 11250.71 99.68 1789.69 1406.33875 -Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 11250.71 99.68 1789.69 1406.33875 -Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 11250.71 99.68 1789.69 1406.33875 -Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 11250.71 99.68 1789.69 1406.33875 -Manufacturer#3 almond antique forest lavender goldenrod NULL 14221.519999999999 99.68 1789.69 1292.8654545454544 -Manufacturer#3 almond antique forest lavender goldenrod 590.27 14221.519999999999 99.68 1789.69 1292.8654545454544 -Manufacturer#3 almond antique forest lavender goldenrod 1190.27 14221.519999999999 99.68 1789.69 1292.8654545454544 -Manufacturer#3 almond antique forest lavender goldenrod 1190.27 14221.519999999999 99.68 1789.69 1292.8654545454544 -Manufacturer#4 almond antique gainsboro frosted violet NULL 14221.519999999999 99.68 1789.69 1292.8654545454544 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 11250.710000000001 99.68 1789.69 1406.3387500000001 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 11250.710000000001 99.68 1789.69 1406.3387500000001 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 11250.710000000001 99.68 1789.69 1406.3387500000001 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 11250.710000000001 99.68 1789.69 1406.3387500000001 +Manufacturer#3 almond antique forest lavender goldenrod NULL 14221.52 99.68 1789.69 1292.8654545454547 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 14221.52 99.68 1789.69 1292.8654545454547 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 14221.52 99.68 1789.69 1292.8654545454547 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 14221.52 99.68 1789.69 1292.8654545454547 +Manufacturer#4 almond antique gainsboro frosted violet NULL 14221.52 99.68 1789.69 1292.8654545454547 Manufacturer#5 almond antique medium spring khaki 1611.66 17444.84 99.68 1789.69 1341.9107692307693 Manufacturer#5 almond antique medium spring khaki 1611.66 17444.84 99.68 1789.69 1341.9107692307693 Manufacturer#3 almond antique metallic orange dim 55.39 17500.23 55.39 1789.69 1250.0164285714286 @@ -3989,24 +3989,24 @@ Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 22363.09 55.39 Manufacturer#5 almond antique sky peru orange 1788.73 24151.82 55.39 1922.98 1341.7677777777778 Manufacturer#2 almond antique violet chocolate turquoise 1690.68 25842.5 55.39 1922.98 1360.1315789473683 Manufacturer#4 almond antique violet mint lemon 1375.42 27217.92 55.39 1922.98 1360.896 -Manufacturer#2 almond antique violet turquoise frosted 1800.7 32620.019999999997 55.39 1922.98 1418.2617391304345 -Manufacturer#2 almond antique violet turquoise frosted 1800.7 32620.019999999997 55.39 1922.98 1418.2617391304345 -Manufacturer#2 almond antique violet turquoise frosted 1800.7 32620.019999999997 55.39 1922.98 1418.2617391304345 -Manufacturer#1 almond aquamarine burnished black steel 1414.42 34034.439999999995 55.39 1922.98 1418.1016666666665 -Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 35052.53999999999 55.39 1922.98 1402.1015999999997 -Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 36258.799999999996 55.39 1922.98 1394.5692307692307 -Manufacturer#4 almond aquamarine floral ivory bisque NULL 36258.799999999996 55.39 1922.98 1394.5692307692307 -Manufacturer#2 almond aquamarine midnight light salmon 2031.98 38290.78 55.39 2031.98 1418.177037037037 -Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 43188.76 55.39 2031.98 1439.6253333333334 -Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 43188.76 55.39 2031.98 1439.6253333333334 -Manufacturer#1 almond aquamarine pink moccasin thistle NULL 43188.76 55.39 2031.98 1439.6253333333334 -Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 43188.76 55.39 2031.98 1439.6253333333334 -Manufacturer#2 almond aquamarine rose maroon antique 900.66 45788.08 55.39 2031.98 1430.8775 -Manufacturer#2 almond aquamarine rose maroon antique 1698.66 45788.08 55.39 2031.98 1430.8775 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 46788.68 55.39 2031.98 1417.8387878787878 -Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 48633.6 55.39 2031.98 1430.3999999999999 -Manufacturer#4 almond azure aquamarine papaya violet 1290.35 49923.95 55.39 2031.98 1426.3985714285714 -Manufacturer#5 almond azure blanched chiffon midnight 1464.48 51388.43 55.39 2031.98 1427.4563888888888 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 32620.02 55.39 1922.98 1418.2617391304348 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 32620.02 55.39 1922.98 1418.2617391304348 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 32620.02 55.39 1922.98 1418.2617391304348 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 34034.44 55.39 1922.98 1418.1016666666667 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 35052.54 55.39 1922.98 1402.1016 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 36258.8 55.39 1922.98 1394.569230769231 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 36258.8 55.39 1922.98 1394.569230769231 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 38290.780000000006 55.39 2031.98 1418.1770370370373 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 43188.76000000001 55.39 2031.98 1439.6253333333336 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 43188.76000000001 55.39 2031.98 1439.6253333333336 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 43188.76000000001 55.39 2031.98 1439.6253333333336 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 43188.76000000001 55.39 2031.98 1439.6253333333336 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 45788.08000000001 55.39 2031.98 1430.8775000000003 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 45788.08000000001 55.39 2031.98 1430.8775000000003 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 46788.68000000001 55.39 2031.98 1417.838787878788 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 48633.600000000006 55.39 2031.98 1430.4 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 49923.950000000004 55.39 2031.98 1426.3985714285716 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 51388.43000000001 55.39 2031.98 1427.456388888889 PREHOOK: query: explain vectorization detail select p_mfgr,p_name, p_retailprice, sum(p_retailprice) over(order by p_name rows between unbounded preceding and current row) as s, @@ -5947,3 +5947,345 @@ Manufacturer#5 almond antique medium spring khaki 1611.66 2 Manufacturer#5 almond antique sky peru orange 1788.73 4 Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 5 Manufacturer#5 almond azure blanched chiffon midnight 1464.48 6 +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr) as rn, +rank() over(partition by p_mfgr) as r, +dense_rank() over(partition by p_mfgr) as dr, +first_value(p_retailprice) over(partition by p_mfgr) as fv, +last_value(p_retailprice) over(partition by p_mfgr) as lv, +count(p_retailprice) over(partition by p_mfgr) as c, +count(*) over(partition by p_mfgr) as cs +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr) as rn, +rank() over(partition by p_mfgr) as r, +dense_rank() over(partition by p_mfgr) as dr, +first_value(p_retailprice) over(partition by p_mfgr) as fv, +last_value(p_retailprice) over(partition by p_mfgr) as lv, +count(p_retailprice) over(partition by p_mfgr) as c, +count(*) over(partition by p_mfgr) as cs +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice rn r dr fv lv c cs +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 1 1 1 1290.35 1206.26 4 6 +Manufacturer#4 almond antique violet mint lemon 1375.42 2 1 1 1290.35 1206.26 4 6 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 3 1 1 1290.35 1206.26 4 6 +Manufacturer#4 almond antique gainsboro frosted violet NULL 4 1 1 1290.35 1206.26 4 6 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 5 1 1 1290.35 1206.26 4 6 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 6 1 1 1290.35 1206.26 4 6 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 1 1 1 1464.48 1788.73 6 6 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 2 1 1 1464.48 1788.73 6 6 +Manufacturer#5 almond antique medium spring khaki 1611.66 3 1 1 1464.48 1788.73 6 6 +Manufacturer#5 almond antique blue firebrick mint 1789.69 4 1 1 1464.48 1788.73 6 6 +Manufacturer#5 almond antique medium spring khaki 1611.66 5 1 1 1464.48 1788.73 6 6 +Manufacturer#5 almond antique sky peru orange 1788.73 6 1 1 1464.48 1788.73 6 6 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 1 1 1 900.66 1800.7 8 8 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 2 1 1 900.66 1800.7 8 8 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 3 1 1 900.66 1800.7 8 8 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 4 1 1 900.66 1800.7 8 8 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 5 1 1 900.66 1800.7 8 8 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 6 1 1 900.66 1800.7 8 8 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 7 1 1 900.66 1800.7 8 8 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 8 1 1 900.66 1800.7 8 8 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 1 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 2 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 3 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 4 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 5 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond antique burnished rose metallic 1173.15 6 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 7 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 8 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond antique burnished rose metallic 1173.15 9 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 10 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 11 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 12 1 1 1753.76 1632.66 11 12 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 1 1 1 590.27 99.68 7 8 +Manufacturer#3 almond antique metallic orange dim 55.39 2 1 1 590.27 99.68 7 8 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 3 1 1 590.27 99.68 7 8 +Manufacturer#3 almond antique olive coral navajo 1337.29 4 1 1 590.27 99.68 7 8 +Manufacturer#3 almond antique misty red olive 1922.98 5 1 1 590.27 99.68 7 8 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 6 1 1 590.27 99.68 7 8 +Manufacturer#3 almond antique forest lavender goldenrod NULL 7 1 1 590.27 99.68 7 8 +Manufacturer#3 almond antique chartreuse khaki white 99.68 8 1 1 590.27 99.68 7 8 +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name) as rn, +rank() over(partition by p_mfgr order by p_name) as r, +dense_rank() over(partition by p_mfgr order by p_name) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name) as c, +count(*) over(partition by p_mfgr order by p_name) as cs +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name) as rn, +rank() over(partition by p_mfgr order by p_name) as r, +dense_rank() over(partition by p_mfgr order by p_name) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name) as c, +count(*) over(partition by p_mfgr order by p_name) as cs +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice rn r dr fv lv c cs +Manufacturer#1 almond antique burnished rose metallic 1173.15 1 1 1 1173.15 1173.15 2 2 +Manufacturer#1 almond antique burnished rose metallic 1173.15 2 1 1 1173.15 1173.15 2 2 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 3 2 1173.15 1753.76 6 6 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 4 3 2 1173.15 1753.76 6 6 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 5 3 2 1173.15 1753.76 6 6 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 6 3 2 1173.15 1753.76 6 6 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 7 7 3 1173.15 1602.59 7 7 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 8 8 4 1173.15 1414.42 8 8 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 9 5 1173.15 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 10 9 5 1173.15 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 11 9 5 1173.15 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 12 9 5 1173.15 1632.66 11 12 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 1 1 1 1690.68 1690.68 1 1 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 2 2 2 1690.68 1800.7 4 4 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 3 2 2 1690.68 1800.7 4 4 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 4 2 2 1690.68 1800.7 4 4 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 5 5 3 1690.68 2031.98 5 5 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 6 6 4 1690.68 1698.66 7 7 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 7 6 4 1690.68 1698.66 7 7 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 8 8 5 1690.68 1000.6 8 8 +Manufacturer#3 almond antique chartreuse khaki white 99.68 1 1 1 99.68 99.68 1 1 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 2 2 2 99.68 1190.27 4 5 +Manufacturer#3 almond antique forest lavender goldenrod NULL 3 2 2 99.68 1190.27 4 5 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 4 2 2 99.68 1190.27 4 5 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 5 2 2 99.68 1190.27 4 5 +Manufacturer#3 almond antique metallic orange dim 55.39 6 6 3 99.68 55.39 5 6 +Manufacturer#3 almond antique misty red olive 1922.98 7 7 4 99.68 1922.98 6 7 +Manufacturer#3 almond antique olive coral navajo 1337.29 8 8 5 99.68 1337.29 7 8 +Manufacturer#4 almond antique gainsboro frosted violet NULL 1 1 1 NULL NULL 0 1 +Manufacturer#4 almond antique violet mint lemon 1375.42 2 2 2 NULL 1375.42 1 2 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 3 3 3 NULL 1206.26 2 4 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 4 3 3 NULL 1206.26 2 4 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 5 5 4 NULL 1844.92 3 5 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 6 6 5 NULL 1290.35 4 6 +Manufacturer#5 almond antique blue firebrick mint 1789.69 1 1 1 1789.69 1789.69 1 1 +Manufacturer#5 almond antique medium spring khaki 1611.66 2 2 2 1789.69 1611.66 3 3 +Manufacturer#5 almond antique medium spring khaki 1611.66 3 2 2 1789.69 1611.66 3 3 +Manufacturer#5 almond antique sky peru orange 1788.73 4 4 3 1789.69 1788.73 4 4 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 5 5 4 1789.69 1018.1 5 5 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 6 6 5 1789.69 1464.48 6 6 +PREHOOK: query: select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_retailprice r +Manufacturer#4 1290.35 1 +Manufacturer#4 1375.42 1 +Manufacturer#4 NULL 1 +Manufacturer#4 NULL 1 +Manufacturer#4 1844.92 1 +Manufacturer#4 1206.26 1 +Manufacturer#5 1464.48 1 +Manufacturer#5 1018.1 1 +Manufacturer#5 1611.66 1 +Manufacturer#5 1789.69 1 +Manufacturer#5 1611.66 1 +Manufacturer#5 1788.73 1 +Manufacturer#2 900.66 1 +Manufacturer#2 1698.66 1 +Manufacturer#2 1800.7 1 +Manufacturer#2 1690.68 1 +Manufacturer#2 1800.7 1 +Manufacturer#2 1000.6 1 +Manufacturer#2 2031.98 1 +Manufacturer#2 1800.7 1 +Manufacturer#1 1753.76 1 +Manufacturer#1 1632.66 1 +Manufacturer#1 1632.66 1 +Manufacturer#1 1753.76 1 +Manufacturer#1 1414.42 1 +Manufacturer#1 1173.15 1 +Manufacturer#1 1602.59 1 +Manufacturer#1 1753.76 1 +Manufacturer#1 1173.15 1 +Manufacturer#1 1753.76 1 +Manufacturer#1 NULL 1 +Manufacturer#1 1632.66 1 +Manufacturer#3 590.27 1 +Manufacturer#3 55.39 1 +Manufacturer#3 1190.27 1 +Manufacturer#3 1337.29 1 +Manufacturer#3 1922.98 1 +Manufacturer#3 1190.27 1 +Manufacturer#3 NULL 1 +Manufacturer#3 99.68 1 +PREHOOK: query: select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr order by p_name) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr order by p_name) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_retailprice r +Manufacturer#1 1173.15 1 +Manufacturer#1 1173.15 1 +Manufacturer#1 1753.76 3 +Manufacturer#1 1753.76 3 +Manufacturer#1 1753.76 3 +Manufacturer#1 1753.76 3 +Manufacturer#1 1602.59 7 +Manufacturer#1 1414.42 8 +Manufacturer#1 1632.66 9 +Manufacturer#1 NULL 9 +Manufacturer#1 1632.66 9 +Manufacturer#1 1632.66 9 +Manufacturer#2 1690.68 1 +Manufacturer#2 1800.7 2 +Manufacturer#2 1800.7 2 +Manufacturer#2 1800.7 2 +Manufacturer#2 2031.98 5 +Manufacturer#2 900.66 6 +Manufacturer#2 1698.66 6 +Manufacturer#2 1000.6 8 +Manufacturer#3 99.68 1 +Manufacturer#3 590.27 2 +Manufacturer#3 NULL 2 +Manufacturer#3 1190.27 2 +Manufacturer#3 1190.27 2 +Manufacturer#3 55.39 6 +Manufacturer#3 1922.98 7 +Manufacturer#3 1337.29 8 +Manufacturer#4 NULL 1 +Manufacturer#4 1375.42 2 +Manufacturer#4 NULL 3 +Manufacturer#4 1206.26 3 +Manufacturer#4 1844.92 5 +Manufacturer#4 1290.35 6 +Manufacturer#5 1789.69 1 +Manufacturer#5 1611.66 2 +Manufacturer#5 1611.66 2 +Manufacturer#5 1788.73 4 +Manufacturer#5 1018.1 5 +Manufacturer#5 1464.48 6 +PREHOOK: query: select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice r +Manufacturer#1 almond antique burnished rose metallic 1173.15 1 +Manufacturer#1 almond antique burnished rose metallic 1173.15 1 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 7 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 8 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 9 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 1 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 2 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 2 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 2 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 5 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 6 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 6 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 8 +Manufacturer#3 almond antique chartreuse khaki white 99.68 1 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 2 +Manufacturer#3 almond antique forest lavender goldenrod NULL 2 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 2 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 2 +Manufacturer#3 almond antique metallic orange dim 55.39 6 +Manufacturer#3 almond antique misty red olive 1922.98 7 +Manufacturer#3 almond antique olive coral navajo 1337.29 8 +Manufacturer#4 almond antique gainsboro frosted violet NULL 1 +Manufacturer#4 almond antique violet mint lemon 1375.42 2 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 3 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 3 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 5 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 6 +Manufacturer#5 almond antique blue firebrick mint 1789.69 1 +Manufacturer#5 almond antique medium spring khaki 1611.66 2 +Manufacturer#5 almond antique medium spring khaki 1611.66 2 +Manufacturer#5 almond antique sky peru orange 1788.73 4 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 5 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 6 +PREHOOK: query: select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice r +Manufacturer#1 almond aquamarine burnished black steel 1414.42 1 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 1 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 1 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 1 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 1 +Manufacturer#1 almond antique burnished rose metallic 1173.15 1 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 1 +Manufacturer#1 almond antique burnished rose metallic 1173.15 1 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 1 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 1 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 1 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 1 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 1 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 1 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 1 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 1 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 1 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 1 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 1 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 1 +Manufacturer#4 almond antique violet mint lemon 1375.42 1 +Manufacturer#4 almond antique gainsboro frosted violet NULL 1 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 1 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 1 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 1 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 1 +Manufacturer#3 almond antique misty red olive 1922.98 1 +Manufacturer#3 almond antique chartreuse khaki white 99.68 1 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 1 +Manufacturer#3 almond antique metallic orange dim 55.39 1 +Manufacturer#3 almond antique olive coral navajo 1337.29 1 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 1 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 1 +Manufacturer#3 almond antique forest lavender goldenrod NULL 1 +Manufacturer#5 almond antique medium spring khaki 1611.66 1 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 1 +Manufacturer#5 almond antique sky peru orange 1788.73 1 +Manufacturer#5 almond antique medium spring khaki 1611.66 1 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 1 +Manufacturer#5 almond antique blue firebrick mint 1789.69 1 diff --git ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out index 5ab77c8..729a754 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out @@ -2020,3 +2020,226 @@ Manufacturer#2 1698.66 25 true Manufacturer#2 1701.6 18 true Manufacturer#2 1800.7 40 true Manufacturer#2 2031.98 2 true +PREHOOK: query: select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), +max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1 +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), +max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1 +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_retailprice p_size _c3 _c4 +Manufacturer#1 1173.15 2 true true +Manufacturer#1 1173.15 2 true true +Manufacturer#1 1414.42 28 true true +Manufacturer#1 1602.59 6 true true +Manufacturer#1 1632.66 42 true true +Manufacturer#1 1753.76 34 true true +Manufacturer#2 1690.68 14 true true +Manufacturer#2 1698.66 25 true true +Manufacturer#2 1701.6 18 true true +Manufacturer#2 1800.7 40 true true +Manufacturer#2 2031.98 2 true true +Manufacturer#3 1190.27 14 true true +Manufacturer#3 1337.29 45 true true +Manufacturer#3 1410.39 19 true true +Manufacturer#3 1671.68 17 true true +Manufacturer#3 1922.98 1 true true +Manufacturer#4 1206.26 27 true true +Manufacturer#4 1290.35 12 true true +Manufacturer#4 1375.42 39 true true +Manufacturer#4 1620.67 10 true true +Manufacturer#4 1844.92 7 true true +Manufacturer#5 1018.1 46 true true +Manufacturer#5 1464.48 23 true true +Manufacturer#5 1611.66 6 true true +Manufacturer#5 1788.73 2 true true +Manufacturer#5 1789.69 31 true true +PREHOOK: query: select p_mfgr, p_retailprice, p_size, +rank() over (distribute by p_mfgr sort by p_retailprice) as r, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1 +from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_retailprice, p_size, +rank() over (distribute by p_mfgr sort by p_retailprice) as r, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1 +from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_retailprice p_size r s2 s1 +Manufacturer#1 1173.15 2 1 1173.15 1168.15 +Manufacturer#1 1173.15 2 1 2346.3 2341.3 +Manufacturer#1 1414.42 28 3 3760.7200000000003 3755.7200000000003 +Manufacturer#1 1602.59 6 4 5363.31 5358.31 +Manufacturer#1 1632.66 42 5 6995.97 6990.97 +Manufacturer#1 1753.76 34 6 8749.73 8744.73 +Manufacturer#2 1690.68 14 1 1690.68 1685.68 +Manufacturer#2 1698.66 25 2 3389.34 3384.34 +Manufacturer#2 1701.6 18 3 5090.9400000000005 5085.9400000000005 +Manufacturer#2 1800.7 40 4 6891.64 6886.64 +Manufacturer#2 2031.98 2 5 8923.62 8918.62 +Manufacturer#3 1190.27 14 1 1190.27 1185.27 +Manufacturer#3 1337.29 45 2 2527.56 2522.56 +Manufacturer#3 1410.39 19 3 3937.95 3932.95 +Manufacturer#3 1671.68 17 4 5609.63 5604.63 +Manufacturer#3 1922.98 1 5 7532.610000000001 7527.610000000001 +Manufacturer#4 1206.26 27 1 1206.26 1201.26 +Manufacturer#4 1290.35 12 2 2496.6099999999997 2491.6099999999997 +Manufacturer#4 1375.42 39 3 3872.0299999999997 3867.0299999999997 +Manufacturer#4 1620.67 10 4 5492.7 5487.7 +Manufacturer#4 1844.92 7 5 7337.62 7332.62 +Manufacturer#5 1018.1 46 1 1018.1 1013.1 +Manufacturer#5 1464.48 23 2 2482.58 2477.58 +Manufacturer#5 1611.66 6 3 4094.24 4089.24 +Manufacturer#5 1788.73 2 4 5882.969999999999 5877.969999999999 +Manufacturer#5 1789.69 31 5 7672.66 7667.66 +PREHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr avg_window_0 +Manufacturer#2 1800.7 +Manufacturer#4 1375.42 +Manufacturer#4 1620.67 +Manufacturer#4 1206.26 +Manufacturer#5 1788.73 +Manufacturer#1 1632.66 +Manufacturer#2 1690.68 +Manufacturer#2 1698.66 +Manufacturer#2 1701.6 +Manufacturer#3 1337.29 +Manufacturer#4 1844.92 +Manufacturer#4 1290.35 +Manufacturer#5 1018.1 +Manufacturer#5 1789.69 +Manufacturer#1 1753.76 +Manufacturer#1 1602.59 +Manufacturer#1 1173.15 +Manufacturer#1 1173.15 +Manufacturer#1 1414.42 +Manufacturer#2 2031.98 +Manufacturer#3 1922.98 +Manufacturer#3 1410.39 +Manufacturer#3 1190.27 +Manufacturer#5 1464.48 +Manufacturer#5 1611.66 +Manufacturer#3 1671.68 +PREHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr avg_window_0 +Manufacturer#1 1753.76 +Manufacturer#1 1693.21 +Manufacturer#1 1663.0033333333333 +Manufacturer#1 1540.54 +Manufacturer#1 1467.062 +Manufacturer#1 1458.2883333333332 +Manufacturer#2 1800.7 +Manufacturer#2 1745.69 +Manufacturer#2 1841.1200000000001 +Manufacturer#2 1805.505 +Manufacturer#2 1784.7240000000002 +Manufacturer#3 1922.98 +Manufacturer#3 1666.685 +Manufacturer#3 1668.3500000000001 +Manufacturer#3 1548.83 +Manufacturer#3 1506.522 +Manufacturer#4 1844.92 +Manufacturer#4 1610.17 +Manufacturer#4 1613.67 +Manufacturer#4 1511.8175 +Manufacturer#4 1467.5240000000001 +Manufacturer#5 1018.1 +Manufacturer#5 1241.29 +Manufacturer#5 1424.0900000000001 +Manufacturer#5 1515.25 +Manufacturer#5 1534.532 +PREHOOK: query: from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select * +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +PREHOOK: Output: default@t1 +PREHOOK: Output: default@t2 +POSTHOOK: query: from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +POSTHOOK: Output: default@t1 +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: t1.a1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal(4,2), comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: t1.b1 SIMPLE [(over10k)over10k.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: t2.a1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal(4,2), comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: t2.b1 SIMPLE [(over10k)over10k.FieldSchema(name:s, type:string, comment:null), ] +_col0 _col1 +PREHOOK: query: select * from t1 limit 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select * from t1 limit 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +t1.a1 t1.b1 +65542 rachel thompson +131088 oscar brown +262258 wendy steinbeck +PREHOOK: query: select * from t2 limit 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from t2 limit 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +t2.a1 t2.b1 +65542 rachel thompson +131088 oscar brown +262258 wendy steinbeck +PREHOOK: query: select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2) +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +limit 11 +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2) +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +limit 11 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_retailprice p_size _c3 +Manufacturer#1 1173.15 2 true +Manufacturer#1 1173.15 2 true +Manufacturer#1 1414.42 28 true +Manufacturer#1 1602.59 6 true +Manufacturer#1 1632.66 42 true +Manufacturer#1 1753.76 34 true +Manufacturer#2 1690.68 14 true +Manufacturer#2 1698.66 25 true +Manufacturer#2 1701.6 18 true +Manufacturer#2 1800.7 40 true +Manufacturer#2 2031.98 2 true