diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 1934126..a6ecb37 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2872,9 +2872,17 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "1. chosen : use VectorUDFAdaptor for a small set of UDFs that were choosen for good performance\n" + "2. all : use VectorUDFAdaptor for all UDFs" ), - HIVE_VECTORIZATION_PTF_ENABLED("hive.vectorized.execution.ptf.enabled", false, + HIVE_VECTORIZATION_PTF_ENABLED("hive.vectorized.execution.ptf.enabled", true, "This flag should be set to true to enable vectorized mode of the PTF of query execution.\n" + - "The default value is false."), + "The default value is true."), + + HIVE_VECTORIZATION_PTF_MAX_MEMORY_BUFFERING_BATCH_COUNT("hive.vectorized.ptf.max.memory.buffering.batch.count", 25, + "Maximum number of vectorized row batches to buffer in memory for PTF\n" + + "The default value is 25"), + HIVE_VECTORIZATION_TESTING_REDUCER_BATCH_SIZE("hive.vectorized.testing.reducer.batch.size", -1, + "internal use only, used for creating small group key vectorized row batches to exercise more logic\n" + + "The default value is -1 which means don't restrict for testing", + true), HIVE_VECTORIZATION_COMPLEX_TYPES_ENABLED("hive.vectorized.complex.types.enabled", true, "This flag should be set to true to enable vectorization\n" + diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java index 6523f00..ac4f5ab 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java @@ -42,7 +42,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapper; import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapperBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinRowBytesContainer; +import org.apache.hadoop.hive.ql.exec.vector.rowbytescontainer.VectorRowBytesContainer; import org.apache.hadoop.hive.ql.io.HiveKey; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.HiveUtils; @@ -131,7 +131,7 @@ public long getEstimatedMemorySize() { BytesBytesMultiHashMap hashMap; // In memory hashMap KeyValueContainer sidefileKVContainer; // Stores small table key/value pairs ObjectContainer matchfileObjContainer; // Stores big table rows - VectorMapJoinRowBytesContainer matchfileRowBytesContainer; + VectorRowBytesContainer matchfileRowBytesContainer; // Stores big table rows as bytes for native vector map join. Path hashMapLocalPath; // Local file system path for spilled hashMap boolean hashMapOnDisk; // Status of hashMap. true: on disk, false: in memory @@ -216,9 +216,9 @@ public ObjectContainer getMatchfileObjContainer() { } /* Get the big table row bytes container for native vector map join */ - public VectorMapJoinRowBytesContainer getMatchfileRowBytesContainer() { + public VectorRowBytesContainer getMatchfileRowBytesContainer() { if (matchfileRowBytesContainer == null) { - matchfileRowBytesContainer = new VectorMapJoinRowBytesContainer(spillLocalDirs); + matchfileRowBytesContainer = new VectorRowBytesContainer(spillLocalDirs); } return matchfileRowBytesContainer; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java index f6f2dd0..9de3850 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java @@ -297,7 +297,8 @@ private void initializeSourceForTag(ReduceWork redWork, int tag, ObjectInspector boolean vectorizedRecordSource = (tag == bigTablePosition) && redWork.getVectorMode(); sources[tag].init(jconf, redWork.getReducer(), vectorizedRecordSource, keyTableDesc, valueTableDesc, reader, tag == bigTablePosition, (byte) tag, - redWork.getVectorizedRowBatchCtx(), redWork.getVectorizedVertexNum()); + redWork.getVectorizedRowBatchCtx(), redWork.getVectorizedVertexNum(), + redWork.getVectorizedTestingReducerBatchSize()); ois[tag] = sources[tag].getObjectInspector(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java index 872c223..95dbdba 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java @@ -121,13 +121,22 @@ private final GroupIterator groupIterator = new GroupIterator(); private long vectorizedVertexNum; + private int vectorizedTestingReducerBatchSize; void init(JobConf jconf, Operator reducer, boolean vectorized, TableDesc keyTableDesc, TableDesc valueTableDesc, Reader reader, boolean handleGroupKey, byte tag, - VectorizedRowBatchCtx batchContext, long vectorizedVertexNum) + VectorizedRowBatchCtx batchContext, long vectorizedVertexNum, + int vectorizedTestingReducerBatchSize) throws Exception { this.vectorizedVertexNum = vectorizedVertexNum; + if (vectorizedTestingReducerBatchSize > VectorizedRowBatch.DEFAULT_SIZE) { + + // For now, we don't go higher than the default batch size unless we do more work + // to verify every vectorized operator downstream can handle a larger batch size. + vectorizedTestingReducerBatchSize = VectorizedRowBatch.DEFAULT_SIZE; + } + this.vectorizedTestingReducerBatchSize = vectorizedTestingReducerBatchSize; ObjectInspector keyObjectInspector; this.reducer = reducer; @@ -417,7 +426,10 @@ private void processVectorGroup(BytesWritable keyWritable, VectorizedBatchUtil.setRepeatingColumn(batch, i); } - final int maxSize = batch.getMaxSize(); + final int maxSize = + (vectorizedTestingReducerBatchSize > 0 ? + Math.min(vectorizedTestingReducerBatchSize, batch.getMaxSize()) : + batch.getMaxSize()); Preconditions.checkState(maxSize > 0); int rowIdx = 0; int batchBytes = keyBytes.length; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java index be471c6..211622d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java @@ -111,6 +111,22 @@ public void init(List typeNames) throws HiveException { vectorExtractRow.init(typeInfos); } + public void init(TypeInfo[] typeInfos) + throws HiveException { + + final int size = typeInfos.length; + this.typeInfos = Arrays.copyOf(typeInfos, size); + outputColumnNums = new int[size]; + objectInspectors = new ObjectInspector[size]; + for (int i = 0; i < size; i++) { + objectInspectors[i] = + TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfos[i]); + outputColumnNums[i] = i; + } + + vectorExtractRow.init(this.typeInfos, outputColumnNums); + } + public void init(TypeInfo[] typeInfos, int[] columnMap) throws HiveException { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java index 03c09e7..4945d74 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java @@ -625,12 +625,168 @@ public static ColumnVector makeLikeColumnVector(ColumnVector source " is not supported!"); } - public static void swapColumnVector( - VectorizedRowBatch batch1, int batch1ColumnNum, - VectorizedRowBatch batch2, int batch2ColumnNum) { - ColumnVector colVector1 = batch1.cols[batch1ColumnNum]; - batch1.cols[batch1ColumnNum] = batch2.cols[batch2ColumnNum]; - batch2.cols[batch2ColumnNum] = colVector1; + private static final byte[] EMPTY_BYTES = new byte[0]; + private static final HiveIntervalDayTime emptyIntervalDayTime = new HiveIntervalDayTime(0, 0); + + public static void copyNonSelectedColumnVector( + VectorizedRowBatch sourceBatch, int sourceColumnNum, + VectorizedRowBatch targetBatch, int targetColumnNum, + int size) { + + ColumnVector sourceColVector = sourceBatch.cols[sourceColumnNum]; + ColumnVector targetColVector = targetBatch.cols[targetColumnNum]; + if (sourceColVector.noNulls && targetColVector.noNulls) { + // No isNull copying necessary. + } else if (sourceColVector.noNulls) { + + // Clear out isNull array. + targetColVector.reset(); + } else { + System.arraycopy(sourceColVector.isNull, 0, targetColVector.isNull, 0, size); + targetColVector.noNulls = false; + } + if (sourceColVector.isRepeating) { + size = 1; + targetColVector.isRepeating = true; + } else { + targetColVector.isRepeating = false; + } + + // Primitive column types ignore nulls and just copy all values. + switch (sourceColVector.type) { + case LONG: + { + long[] sourceVector = ((LongColumnVector) sourceColVector).vector; + long[] targetVector = ((LongColumnVector) targetColVector).vector; + System.arraycopy(sourceVector, 0, targetVector, 0, size); + } + break; + case DOUBLE: + { + double[] sourceVector = ((DoubleColumnVector) sourceColVector).vector; + double[] targetVector = ((DoubleColumnVector) targetColVector).vector; + System.arraycopy(sourceVector, 0, targetVector, 0, size); + } + break; + case BYTES: + { + BytesColumnVector sourceBytesColVector = ((BytesColumnVector) sourceColVector); + byte[][] sourceVector = sourceBytesColVector.vector; + int[] sourceStart = sourceBytesColVector.start; + int[] sourceLength = sourceBytesColVector.length; + + BytesColumnVector targetBytesColVector = ((BytesColumnVector) targetColVector); + + if (sourceColVector.noNulls) { + for (int i = 0; i < size; i++) { + targetBytesColVector.setVal(i, sourceVector[i], sourceStart[i], sourceLength[i]); + } + } else { + boolean[] sourceIsNull = sourceColVector.isNull; + + // Target isNull was copied at beginning of method. + for (int i = 0; i < size; i++) { + if (!sourceIsNull[i]) { + targetBytesColVector.setVal(i, sourceVector[i], sourceStart[i], sourceLength[i]); + } else { + targetBytesColVector.setRef(i, EMPTY_BYTES, 0, 0); + } + } + } + } + break; + case DECIMAL: + { + DecimalColumnVector sourceDecimalColVector = ((DecimalColumnVector) sourceColVector); + HiveDecimalWritable[] sourceVector = sourceDecimalColVector.vector; + + DecimalColumnVector targetDecimalColVector = ((DecimalColumnVector) targetColVector); + + if (sourceColVector.noNulls) { + for (int i = 0; i < size; i++) { + targetDecimalColVector.set(i, sourceVector[i]); + } + } else { + boolean[] sourceIsNull = sourceColVector.isNull; + + // Target isNull was copied at beginning of method. + for (int i = 0; i < size; i++) { + if (!sourceIsNull[i]) { + targetDecimalColVector.set(i, sourceVector[i]); + } else { + targetDecimalColVector.vector[i].setFromLong(0); + } + } + } + } + break; + case TIMESTAMP: + { + TimestampColumnVector sourceTimestampColVector = ((TimestampColumnVector) sourceColVector); + long[] sourceTime = sourceTimestampColVector.time; + int[] sourceNanos = sourceTimestampColVector.nanos; + + TimestampColumnVector targetTimestampColVector = ((TimestampColumnVector) targetColVector); + long[] targetTime = targetTimestampColVector.time; + int[] targetNanos = targetTimestampColVector.nanos; + + if (sourceColVector.noNulls) { + for (int i = 0; i < size; i++) { + targetTime[i] = sourceTime[i]; + targetNanos[i] = targetNanos[i]; + } + } else { + boolean[] sourceIsNull = sourceColVector.isNull; + + // Target isNull was copied at beginning of method. + for (int i = 0; i < size; i++) { + if (!sourceIsNull[i]) { + targetTime[i] = sourceTime[i]; + targetNanos[i] = targetNanos[i]; + } else { + targetTime[i] = 0; + targetNanos[i] = 0; + } + } + } + } + break; + case INTERVAL_DAY_TIME: + { + IntervalDayTimeColumnVector sourceIntervalDayTimeColVector = ((IntervalDayTimeColumnVector) sourceColVector); + + IntervalDayTimeColumnVector targetIntervalDayTimeColVector = ((IntervalDayTimeColumnVector) targetColVector); + + if (sourceColVector.noNulls) { + for (int i = 0; i < size; i++) { + targetIntervalDayTimeColVector.set( + i, targetIntervalDayTimeColVector.asScratchIntervalDayTime(i)); + } + } else { + boolean[] sourceIsNull = sourceColVector.isNull; + + // Target isNull was copied at beginning of method. + for (int i = 0; i < size; i++) { + if (!sourceIsNull[i]) { + targetIntervalDayTimeColVector.set( + i, targetIntervalDayTimeColVector.asScratchIntervalDayTime(i)); + } else { + targetIntervalDayTimeColVector.set( + i, emptyIntervalDayTime); + } + } + } + } + break; + case STRUCT: + case LIST: + case MAP: + case UNION: + // No complex type support for now. + default: + throw new RuntimeException("Unexpected column vector type " + sourceColVector.type); + } + } public static void copyRepeatingColumn(VectorizedRowBatch sourceBatch, int sourceColumnNum, @@ -646,29 +802,47 @@ public static void copyRepeatingColumn(VectorizedRowBatch sourceBatch, int sourc return; } - if (sourceColVector instanceof LongColumnVector) { - ((LongColumnVector) targetColVector).vector[0] = ((LongColumnVector) sourceColVector).vector[0]; - } else if (sourceColVector instanceof DoubleColumnVector) { - ((DoubleColumnVector) targetColVector).vector[0] = ((DoubleColumnVector) sourceColVector).vector[0]; - } else if (sourceColVector instanceof BytesColumnVector) { - BytesColumnVector bytesColVector = (BytesColumnVector) sourceColVector; - byte[] bytes = bytesColVector.vector[0]; - final int start = bytesColVector.start[0]; - final int length = bytesColVector.length[0]; - if (setByValue) { - ((BytesColumnVector) targetColVector).setVal(0, bytes, start, length); - } else { - ((BytesColumnVector) targetColVector).setRef(0, bytes, start, length); - } - } else if (sourceColVector instanceof DecimalColumnVector) { - ((DecimalColumnVector) targetColVector).set(0, ((DecimalColumnVector) sourceColVector).vector[0]); - } else if (sourceColVector instanceof TimestampColumnVector) { - ((TimestampColumnVector) targetColVector).set(0, ((TimestampColumnVector) sourceColVector).asScratchTimestamp(0)); - } else if (sourceColVector instanceof IntervalDayTimeColumnVector) { - ((IntervalDayTimeColumnVector) targetColVector).set(0, ((IntervalDayTimeColumnVector) sourceColVector).asScratchIntervalDayTime(0)); - } else { - throw new RuntimeException("Column vector class " + sourceColVector.getClass().getName() + - " is not supported!"); + switch (sourceColVector.type) { + case LONG: + ((LongColumnVector) targetColVector).vector[0] = + ((LongColumnVector) sourceColVector).vector[0]; + break; + case DOUBLE: + ((DoubleColumnVector) targetColVector).vector[0] = + ((DoubleColumnVector) sourceColVector).vector[0]; + break; + case BYTES: + { + BytesColumnVector bytesColVector = (BytesColumnVector) sourceColVector; + byte[] bytes = bytesColVector.vector[0]; + final int start = bytesColVector.start[0]; + final int length = bytesColVector.length[0]; + if (setByValue) { + ((BytesColumnVector) targetColVector).setVal(0, bytes, start, length); + } else { + ((BytesColumnVector) targetColVector).setRef(0, bytes, start, length); + } + } + break; + case DECIMAL: + ((DecimalColumnVector) targetColVector).set( + 0, ((DecimalColumnVector) sourceColVector).vector[0]); + break; + case TIMESTAMP: + ((TimestampColumnVector) targetColVector).set( + 0, ((TimestampColumnVector) sourceColVector).asScratchTimestamp(0)); + break; + case INTERVAL_DAY_TIME: + ((IntervalDayTimeColumnVector) targetColVector).set( + 0, ((IntervalDayTimeColumnVector) sourceColVector).asScratchIntervalDayTime(0)); + break; + case STRUCT: + case LIST: + case MAP: + case UNION: + // No complex type support for now. + default: + throw new RuntimeException("Unexpected column vector type " + sourceColVector.type); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java index bab5ee4..ecf4b9a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java @@ -42,6 +42,7 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.optimized.VectorMapJoinOptimizedCreateHashTable; +import org.apache.hadoop.hive.ql.exec.vector.rowbytescontainer.VectorRowBytesContainer; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.serde2.SerDeException; @@ -481,7 +482,7 @@ private void spillSerializeRow(VectorizedRowBatch batch, int batchIndex, HybridHashTableContainer ht = (HybridHashTableContainer) mapJoinTables[posSingleVectorMapJoinSmallTable]; HashPartition hp = ht.getHashPartitions()[partitionId]; - VectorMapJoinRowBytesContainer rowBytesContainer = hp.getMatchfileRowBytesContainer(); + VectorRowBytesContainer rowBytesContainer = hp.getMatchfileRowBytesContainer(); Output output = rowBytesContainer.getOuputForRowBytes(); // int offset = output.getLength(); bigTableVectorSerializeRow.setOutputAppend(output); @@ -568,7 +569,7 @@ protected void reProcessBigTable(int partitionId) int batchCount = 0; try { - VectorMapJoinRowBytesContainer bigTable = partition.getMatchfileRowBytesContainer(); + VectorRowBytesContainer bigTable = partition.getMatchfileRowBytesContainer(); bigTable.prepareForReading(); while (bigTable.readNext()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java deleted file mode 100644 index fa96ae9..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java +++ /dev/null @@ -1,321 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.exec.vector.mapjoin; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.fs.FileUtil; -import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.serde2.ByteStream.Output; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; - -/** - * An eager bytes container that puts row bytes to an output stream. - */ -public class VectorMapJoinRowBytesContainer { - - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinRowBytesContainer.class); - - private File parentDir; - private File tmpFile; - - // We buffer in a org.apache.hadoop.hive.serde2.ByteStream.Output since that is what - // is used by VectorSerializeRow / SerializeWrite. Periodically, we flush this buffer - // to disk. - private Output output; - private int rowBeginPos; - private static final int OUTPUT_SIZE = 4096; - private static final int THRESHOLD = 8 * (OUTPUT_SIZE / 10); - private static final int INPUT_SIZE = 4096; - - private FileOutputStream fileOutputStream; - - private boolean isOpen; - - private byte[] readBuffer; - private byte[] largeRowBuffer; - private int readOffset; - private int readLength; - - private int readNextCount; - private int readNextIndex; - - private static final int MAX_READS = 256; - private byte[][] readNextBytes; - private int readNextOffsets[]; - private int readNextLengths[]; - - private byte[] currentBytes; - private int currentOffset; - private int currentLength; - - private long totalWriteLength; - private long totalReadLength; - - private FileInputStream fileInputStream; - - private final String spillLocalDirs; - - public VectorMapJoinRowBytesContainer(String spillLocalDirs) { - output = new Output(); - readBuffer = new byte[INPUT_SIZE]; - readNextBytes = new byte[MAX_READS][]; - readNextOffsets = new int[MAX_READS]; - readNextLengths = new int[MAX_READS]; - isOpen = false; - totalWriteLength = 0; - totalReadLength = 0; - this.spillLocalDirs = spillLocalDirs; - } - - private void setupOutputFileStreams() throws IOException { - parentDir = FileUtils.createLocalDirsTempFile(spillLocalDirs, "bytes-container", "", true); - parentDir.deleteOnExit(); - tmpFile = File.createTempFile("BytesContainer", ".tmp", parentDir); - LOG.debug("BytesContainer created temp file " + tmpFile.getAbsolutePath()); - tmpFile.deleteOnExit(); - - fileOutputStream = new FileOutputStream(tmpFile); - } - - private void initFile() { - try { - setupOutputFileStreams(); - } catch (IOException e) { - throw new RuntimeException("Failed to create temporary output file on disk", e); - } - } - - public Output getOuputForRowBytes() { - if (!isOpen) { - initFile(); - isOpen = true; - } - // Reserve space for the int length. - output.reserve(4); - rowBeginPos = output.getLength(); - return output; - } - - public void finishRow() throws IOException { - int length = output.getLength() - rowBeginPos; - output.writeInt(rowBeginPos - 4, length); - if (output.getLength() > THRESHOLD) { - fileOutputStream.write(output.getData(), 0, output.getLength()); - totalWriteLength += output.getLength(); - output.reset(); - } - } - - public void prepareForReading() throws IOException { - if (!isOpen) { - return; - } - if (output.getLength() > 0) { - fileOutputStream.write(output.getData(), 0, output.getLength()); - totalWriteLength += output.getLength(); - fileOutputStream.flush(); - output.reset(); - } - if (fileInputStream != null) { - fileInputStream.close(); - } - fileInputStream = new FileInputStream(tmpFile); - readNextIndex = 0; - readNextCount = 0; - } - - private int readInt() { - int value = (((readBuffer[readOffset] & 0xFF) << 24) | - ((readBuffer[readOffset + 1] & 0xFF) << 16) | - ((readBuffer[readOffset + 2] & 0xFF) << 8) | - ((readBuffer[readOffset + 3] & 0xFF))); - readOffset += 4; - return value; - } - - // Call when nextReadIndex == nextReadCount. - private void bufferedRead() throws IOException { - - // Reset for reading. - readNextIndex = 0; - - // Reset for filling. - readNextCount = 0; - - if (readOffset < readLength) { - // Move unprocessed remainder to beginning of buffer. - int unprocessLength = readLength - readOffset; - System.arraycopy(readBuffer, readOffset, readBuffer, 0, unprocessLength); - - int maxReadLength = readBuffer.length - unprocessLength; - int partialReadLength = fileInputStream.read(readBuffer, unprocessLength, maxReadLength); - if (partialReadLength == -1) { - partialReadLength = 0; - } - totalReadLength += partialReadLength; - readLength = unprocessLength + partialReadLength; - readOffset = 0; - } else { - readOffset = 0; - readLength = fileInputStream.read(readBuffer, 0, readBuffer.length); - if (readLength == -1) { - readLength = 0; - } - totalReadLength += readLength; - } - if (readLength == 0) { - return; - } - if (readLength < 0) { - throw new IOException("Negative read length"); - } - - // Get length word. - if (readLength < 4) { - throw new IOException("Expecting 4 byte length"); - } - - while (true) { - // Use Input class to read length. - int saveReadOffset = readOffset; - int rowLength = readInt(); - if (rowLength < 0) { - throw new IOException("Negative row length"); - } - int remainingLength = readLength - readOffset; - if (remainingLength < rowLength) { - if (readNextCount > 0) { - // Leave this one for the next round. - readOffset = saveReadOffset; - break; - } - - // Buffer needed to bridge. - if (largeRowBuffer == null || largeRowBuffer.length < rowLength) { - int newLargeBufferLength = Math.max(Integer.highestOneBit(rowLength) << 1, INPUT_SIZE); - largeRowBuffer = new byte[newLargeBufferLength]; - } - System.arraycopy(readBuffer, readOffset, largeRowBuffer, 0, remainingLength); - int expectedPartialLength = rowLength - remainingLength; - int partialReadLength = fileInputStream.read(largeRowBuffer, remainingLength, expectedPartialLength); - if (partialReadLength == -1) { - throw new IOException("Unexpected EOF (total write length " + totalWriteLength + - ", total read length " + totalReadLength + ", read length " + - expectedPartialLength + ")"); - } - - if (expectedPartialLength != partialReadLength) { - throw new IOException("Unable to read a complete row of length " + rowLength + - " (total write length " + totalWriteLength + - ", total read length " + totalReadLength + ", read length " + - expectedPartialLength + ", actual length " + partialReadLength + ")"); - } - totalReadLength += partialReadLength; - - readNextBytes[readNextCount] = largeRowBuffer; - readNextOffsets[readNextCount] = 0; - readNextLengths[readNextCount] = rowLength; - - // Indicate we used the last row's bytes for large buffer. - readOffset = readLength; - readNextCount++; - break; - } - - readNextBytes[readNextCount] = readBuffer; - readNextOffsets[readNextCount] = readOffset; - readNextLengths[readNextCount] = rowLength; - readOffset += rowLength; - readNextCount++; - - if (readNextCount >= readNextBytes.length){ - break; - } - if (readLength - readOffset < 4) { - // Handle in next round. - break; - } - } - } - - public boolean readNext() throws IOException { - if (!isOpen) { - return false; - } - if (readNextIndex >= readNextCount) { - bufferedRead(); - // Any more left? - if (readNextIndex >= readNextCount) { - return false; - } - } - - currentBytes = readNextBytes[readNextIndex]; - currentOffset = readNextOffsets[readNextIndex]; - currentLength = readNextLengths[readNextIndex]; - - readNextIndex++; - return true; - } - - public byte[] currentBytes() { - return currentBytes; - } - - public int currentOffset() { - return currentOffset; - } - - public int currentLength() { - return currentLength; - } - - public void clear() { - if (fileInputStream != null) { - try { - fileInputStream.close(); - } catch (Throwable ignored) { - } - fileInputStream = null; - } - if (fileOutputStream != null) { - try { - fileOutputStream.close(); - } catch (Throwable ignored) { - } - fileOutputStream = null; - } - - if (parentDir != null) { - try { - FileUtil.fullyDelete(parentDir); - } catch (Throwable ignored) { - } - } - parentDir = null; - tmpFile = null; - isOpen = false; - totalWriteLength = 0; - } - -} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java index a843f48..0c611a8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java @@ -18,17 +18,26 @@ package org.apache.hadoop.hive.ql.exec.vector.ptf; +import java.io.IOException; import java.util.ArrayList; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorSerializeRow; import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.rowbytescontainer.VectorRowBytesContainer; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.HiveUtils; +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead; +import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import com.google.common.base.Preconditions; @@ -42,10 +51,15 @@ private static final String CLASS_NAME = VectorPTFGroupBatches.class.getName(); private static final Log LOG = LogFactory.getLog(CLASS_NAME); + private Configuration hconf; + private VectorPTFEvaluatorBase[] evaluators; - private int[] outputColumnMap; + private int[] outputProjectionColumnMap; private int[] keyInputColumnMap; + + private int bufferedColumnCount; private int[] bufferedColumnMap; + private TypeInfo[] bufferedTypeInfos; private ArrayList bufferedBatches; @@ -54,31 +68,101 @@ private int allocatedBufferedBatchCount; private int currentBufferedBatchCount; - public VectorPTFGroupBatches() { + private int spillLimitBufferedBatchCount; + private boolean didSpillToDisk; + private String spillLocalDirs; + private long spillRowCount; + private VectorRowBytesContainer spillRowBytesContainer; + + private transient VectorSerializeRow bufferedBatchVectorSerializeRow; + private transient VectorDeserializeRow bufferedBatchVectorDeserializeRow; + + public VectorPTFGroupBatches(Configuration hconf, int vectorizedPTFMaxMemoryBufferingBatchCount) { + this.hconf = hconf; allocatedBufferedBatchCount = 0; currentBufferedBatchCount = 0; + + spillLocalDirs = HiveUtils.getLocalDirList(hconf); + + // Cannot be 0. + spillLimitBufferedBatchCount = Math.max(1, vectorizedPTFMaxMemoryBufferingBatchCount); + + didSpillToDisk = false; + spillLocalDirs = null; + spillRowBytesContainer = null; + bufferedBatchVectorSerializeRow = null; + bufferedBatchVectorDeserializeRow = null; } - public void init(VectorPTFEvaluatorBase[] evaluators, int[] outputColumnMap, - int[] keyInputColumnMap, int[] nonKeyInputColumnMap, int[] streamingColumnMap, + public void init( + TypeInfo[] reducerBatchTypeInfos, + VectorPTFEvaluatorBase[] evaluators, + int[] outputProjectionColumnMap, TypeInfo[] outputTypeInfos, + int[] keyInputColumnMap, int[] nonKeyInputColumnMap, int[] streamingEvaluatorNums, VectorizedRowBatch overflowBatch) { this.evaluators = evaluators; - this.outputColumnMap = outputColumnMap; + this.outputProjectionColumnMap = outputProjectionColumnMap; this.keyInputColumnMap = keyInputColumnMap; + + /* + * If we have more than one group key batch, we will buffer their contents. + * We don't buffer the key columns since they are a constant for the group key. + * + * We buffer the non-key input columns. And, we buffer any streaming columns that will already + * have their output values. + */ final int nonKeyInputColumnCount = nonKeyInputColumnMap.length; - final int streamingColumnCount = streamingColumnMap.length; - final int bufferedColumnCount = nonKeyInputColumnCount + streamingColumnCount; + final int streamingEvaluatorCount = streamingEvaluatorNums.length; + bufferedColumnCount = nonKeyInputColumnCount + streamingEvaluatorCount; bufferedColumnMap = new int[bufferedColumnCount]; + bufferedTypeInfos = new TypeInfo[bufferedColumnCount]; for (int i = 0; i < nonKeyInputColumnCount; i++) { - bufferedColumnMap[i] = nonKeyInputColumnMap[i]; + final int columnNum = nonKeyInputColumnMap[i]; + bufferedColumnMap[i] = columnNum; + bufferedTypeInfos[i] = reducerBatchTypeInfos[columnNum]; } - for (int i = nonKeyInputColumnCount; i < bufferedColumnCount; i++) { - bufferedColumnMap[i] = streamingColumnMap[i - nonKeyInputColumnCount]; + + for (int i = 0; i < streamingEvaluatorCount; i++) { + final int streamingEvaluatorNum = streamingEvaluatorNums[i]; + final int bufferedMapIndex = nonKeyInputColumnCount + i; + bufferedColumnMap[bufferedMapIndex] = outputProjectionColumnMap[streamingEvaluatorNum]; + bufferedTypeInfos[bufferedMapIndex] = outputTypeInfos[streamingEvaluatorNum]; } this.overflowBatch = overflowBatch; bufferedBatches = new ArrayList(0); } + private VectorRowBytesContainer getSpillRowBytesContainer() throws HiveException { + if (spillRowBytesContainer == null) { + spillRowBytesContainer = new VectorRowBytesContainer(spillLocalDirs); + + if (bufferedBatchVectorSerializeRow == null) { + bufferedBatchVectorSerializeRow = + new VectorSerializeRow( + new LazyBinarySerializeWrite(bufferedColumnMap.length)); + + // Deserialize just the columns we a buffered batch, which has only the non-key inputs and + // streamed column outputs. + bufferedBatchVectorSerializeRow.init(bufferedTypeInfos); + + bufferedBatchVectorDeserializeRow = + new VectorDeserializeRow( + new LazyBinaryDeserializeRead( + bufferedTypeInfos, + /* useExternalBuffer */ true)); + + // Deserialize the fields into the *overflow* batch using the buffered batch column map. + bufferedBatchVectorDeserializeRow.init(bufferedColumnMap); + } + } + return spillRowBytesContainer; + } + + private void releaseSpillRowBytesContainer() { + spillRowBytesContainer.clear(); + spillRowBytesContainer = null; + } + public void evaluateStreamingGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) { // Streaming evaluators fill in their results during the evaluate call. @@ -128,32 +212,101 @@ private void forwardBufferedBatches(VectorPTFOperator vecPTFOperator, int index) throws HiveException { VectorizedRowBatch bufferedBatch = bufferedBatches.get(index); - final int size = bufferedColumnMap.length; - for (int i = 0; i < size; i++) { + final int size = bufferedBatch.size; + final int bufferedColumnCount = bufferedColumnMap.length; + for (int i = 0; i < bufferedColumnCount; i++) { - // Swap ColumnVectors with overflowBatch. We remember buffered columns compactly in the + // Copy ColumnVectors to overflowBatch. We remember buffered columns compactly in the // buffered VRBs without other columns or scratch columns. - VectorizedBatchUtil.swapColumnVector( - bufferedBatch, i, overflowBatch, bufferedColumnMap[i]); + VectorizedBatchUtil.copyNonSelectedColumnVector( + bufferedBatch, i, overflowBatch, bufferedColumnMap[i], size); + } + overflowBatch.size = size; + fillGroupResults(overflowBatch); + vecPTFOperator.forward(overflowBatch, null); + } + + private void forwardSpilledBatches(VectorPTFOperator vecPTFOperator, VectorizedRowBatch lastBatch) + throws HiveException { + + overflowBatch.reset(); + copyPartitionAndOrderColumnsToOverflow(lastBatch); + + long spillRowsRead = 0; + try { + VectorRowBytesContainer rowBytesContainer = getSpillRowBytesContainer(); + rowBytesContainer.prepareForReading(); + + while (rowBytesContainer.readNext()) { + + byte[] bytes = rowBytesContainer.currentBytes(); + int offset = rowBytesContainer.currentOffset(); + int length = rowBytesContainer.currentLength(); + + bufferedBatchVectorDeserializeRow.setBytes(bytes, offset, length); + try { + bufferedBatchVectorDeserializeRow.deserialize(overflowBatch, overflowBatch.size); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead detail: " + + bufferedBatchVectorDeserializeRow.getDetailedReadPositionString(), + e); + } + overflowBatch.size++; + spillRowsRead++; + + if (overflowBatch.size == VectorizedRowBatch.DEFAULT_SIZE) { + + fillGroupResults(overflowBatch); + vecPTFOperator.forward(overflowBatch, null); - overflowBatch.size = bufferedBatch.size; - fillGroupResults(overflowBatch); - vecPTFOperator.forward(overflowBatch, null); + overflowBatch.reset(); + copyPartitionAndOrderColumnsToOverflow(lastBatch); + } + } + // Process the row batch that has less than DEFAULT_SIZE rows + if (overflowBatch.size > 0) { + + fillGroupResults(overflowBatch); + vecPTFOperator.forward(overflowBatch, null); + + overflowBatch.reset(); + copyPartitionAndOrderColumnsToOverflow(lastBatch); + } + Preconditions.checkState(spillRowsRead == spillRowCount); + + // For now, throw away file. + releaseSpillRowBytesContainer(); + + } catch (Exception e) { + throw new HiveException(e); + } + } + + private void copyPartitionAndOrderColumnsToOverflow(VectorizedRowBatch lastBatch) { + + // Set partition and order columns in overflowBatch. + // We can set by ref since our last batch is held by us. + final int keyInputColumnCount = keyInputColumnMap.length; + for (int i = 0; i < keyInputColumnCount; i++) { + final int keyColumnNum = keyInputColumnMap[i]; + Preconditions.checkState(overflowBatch.cols[keyColumnNum] != null); + VectorizedBatchUtil.copyRepeatingColumn( + lastBatch, keyColumnNum, overflowBatch, keyColumnNum, /* setByValue */ false); } } public void fillGroupResultsAndForward(VectorPTFOperator vecPTFOperator, VectorizedRowBatch lastBatch) throws HiveException { - if (currentBufferedBatchCount > 0) { - - // Set partition and order columns in overflowBatch. - // We can set by ref since our last batch is held by us. - final int keyInputColumnCount = keyInputColumnMap.length; - for (int i = 0; i < keyInputColumnCount; i++) { - VectorizedBatchUtil.copyRepeatingColumn(lastBatch, i, overflowBatch, i, /* setByValue */ false); - } + if (didSpillToDisk) { + forwardSpilledBatches(vecPTFOperator, lastBatch); + didSpillToDisk = false; + } + if (currentBufferedBatchCount > 0) { + overflowBatch.reset(); + copyPartitionAndOrderColumnsToOverflow(lastBatch); for (int i = 0; i < currentBufferedBatchCount; i++) { forwardBufferedBatches(vecPTFOperator, i); } @@ -167,8 +320,8 @@ public void fillGroupResultsAndForward(VectorPTFOperator vecPTFOperator, int originalProjectionSize = lastBatch.projectionSize; // Project with the output of our operator. - lastBatch.projectionSize = outputColumnMap.length; - lastBatch.projectedColumns = outputColumnMap; + lastBatch.projectionSize = outputProjectionColumnMap.length; + lastBatch.projectedColumns = outputProjectionColumnMap; vecPTFOperator.forward(lastBatch, null); @@ -197,20 +350,60 @@ private VectorizedRowBatch newBufferedBatch(VectorizedRowBatch batch) throws Hiv public void bufferGroupBatch(VectorizedRowBatch batch) throws HiveException { - final int bufferedColumnCount = bufferedColumnMap.length; - if (allocatedBufferedBatchCount <= currentBufferedBatchCount) { - VectorizedRowBatch newBatch = newBufferedBatch(batch); - bufferedBatches.add(newBatch); - allocatedBufferedBatchCount++; - } + try { + // When we've buffered the max allowed, spill the oldest one to make space. + if (currentBufferedBatchCount >= spillLimitBufferedBatchCount) { - VectorizedRowBatch bufferedBatch = bufferedBatches.get(currentBufferedBatchCount++); + VectorRowBytesContainer rowBytesContainer = getSpillRowBytesContainer(); - for (int i = 0; i < bufferedColumnCount; i++) { - VectorizedBatchUtil.swapColumnVector( - batch, bufferedColumnMap[i], bufferedBatch, i); - } + if (!didSpillToDisk) { + // UNDONE: Don't reuse for now. + // rowBytesContainer.resetWrite(); + didSpillToDisk = true; + spillRowCount = 0; + } + + // Grab the oldest in-memory buffered batch and dump it to disk. + VectorizedRowBatch oldestBufferedBatch = bufferedBatches.remove(0); + + final boolean selectedInUse = oldestBufferedBatch.selectedInUse; + int[] selected = oldestBufferedBatch.selected; + final int size = oldestBufferedBatch.size; + for (int logicalIndex = 0; logicalIndex < size; logicalIndex++) { + final int batchIndex = (selectedInUse ? selected[logicalIndex] : logicalIndex); + + Output output = rowBytesContainer.getOuputForRowBytes(); + bufferedBatchVectorSerializeRow.setOutputAppend(output); + bufferedBatchVectorSerializeRow.serializeWrite(oldestBufferedBatch, batchIndex); + rowBytesContainer.finishRow(); + spillRowCount++; + } - bufferedBatch.size = batch.size; + // Put now available buffered batch at end. + oldestBufferedBatch.reset(); + bufferedBatches.add(oldestBufferedBatch); + currentBufferedBatchCount--; + } + + final int bufferedColumnCount = bufferedColumnMap.length; + if (allocatedBufferedBatchCount <= currentBufferedBatchCount) { + VectorizedRowBatch newBatch = newBufferedBatch(batch); + bufferedBatches.add(newBatch); + allocatedBufferedBatchCount++; + } + + VectorizedRowBatch bufferedBatch = bufferedBatches.get(currentBufferedBatchCount++); + + // Copy critical columns. + final int size = batch.size; + for (int i = 0; i < bufferedColumnCount; i++) { + VectorizedBatchUtil.copyNonSelectedColumnVector( + batch, bufferedColumnMap[i], bufferedBatch, i, size); + } + + bufferedBatch.size = size; + } catch (IOException e) { + throw new HiveException(e); + } } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java index 7522624..0d72ba8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java @@ -97,10 +97,9 @@ * PTF vector expressions. */ - // This is map of which vectorized row batch columns are the input columns and the group value - // (aggregation) output columns. - // And, their types. - private int[] outputColumnMap; + private TypeInfo[] reducerBatchTypeInfos; + + private int[] outputProjectionColumnMap; private String[] outputColumnNames; private TypeInfo[] outputTypeInfos; @@ -135,7 +134,7 @@ private transient VectorPTFEvaluatorBase[] evaluators; - private transient int[] streamingColumnMap; + private transient int[] streamingEvaluatorNums; private transient boolean allEvaluatorsAreStreaming; @@ -179,11 +178,13 @@ public VectorPTFOperator(CompilationOpContext ctx, vectorPTFInfo = vectorDesc.getVectorPTFInfo(); this.vContext = vContext; + reducerBatchTypeInfos = vectorDesc.getReducerBatchTypeInfos(); + isPartitionOrderBy = vectorDesc.getIsPartitionOrderBy(); outputColumnNames = vectorDesc.getOutputColumnNames(); outputTypeInfos = vectorDesc.getOutputTypeInfos(); - outputColumnMap = vectorPTFInfo.getOutputColumnMap(); + outputProjectionColumnMap = vectorPTFInfo.getOutputColumnMap(); /* * Create a new vectorization context to create a new projection, but keep @@ -222,7 +223,7 @@ protected void setupVOutContext() { final int count = outputColumnNames.length; for (int i = 0; i < count; ++i) { String columnName = outputColumnNames[i]; - int outputColumn = outputColumnMap[i]; + int outputColumn = outputProjectionColumnMap[i]; vOutContext.addProjectionColumn(columnName, outputColumn); } } @@ -255,8 +256,8 @@ protected VectorizedRowBatch setupOverflowBatch() throws HiveException { overflowBatch = new VectorizedRowBatch(totalNumColumns); // First, just allocate just the output columns we will be using. - for (int i = 0; i < outputColumnMap.length; i++) { - int outputColumn = outputColumnMap[i]; + for (int i = 0; i < outputProjectionColumnMap.length; i++) { + int outputColumn = outputProjectionColumnMap[i]; String typeName = outputTypeInfos[i].getTypeName(); allocateOverflowBatchColumnVector(overflowBatch, outputColumn, typeName); } @@ -267,8 +268,8 @@ protected VectorizedRowBatch setupOverflowBatch() throws HiveException { allocateOverflowBatchColumnVector(overflowBatch, outputColumn++, typeName); } - overflowBatch.projectedColumns = outputColumnMap; - overflowBatch.projectionSize = outputColumnMap.length; + overflowBatch.projectedColumns = outputProjectionColumnMap; + overflowBatch.projectionSize = outputProjectionColumnMap.length; overflowBatch.reset(); @@ -311,18 +312,26 @@ protected void initializeOp(Configuration hconf) throws HiveException { evaluators = VectorPTFDesc.getEvaluators(vectorDesc, vectorPTFInfo); - streamingColumnMap = VectorPTFDesc.getStreamingColumnMap(evaluators); + streamingEvaluatorNums = VectorPTFDesc.getStreamingEvaluatorNums(evaluators); - allEvaluatorsAreStreaming = (streamingColumnMap.length == evaluatorCount); + allEvaluatorsAreStreaming = (streamingEvaluatorNums.length == evaluatorCount); /* * Setup the overflow batch. */ overflowBatch = setupOverflowBatch(); - groupBatches = new VectorPTFGroupBatches(); + groupBatches = new VectorPTFGroupBatches( + hconf, vectorDesc.getVectorizedPTFMaxMemoryBufferingBatchCount()); groupBatches.init( - evaluators, outputColumnMap, keyInputColumnMap, nonKeyInputColumnMap, streamingColumnMap, overflowBatch); + reducerBatchTypeInfos, + evaluators, + outputProjectionColumnMap, + outputTypeInfos, + keyInputColumnMap, + nonKeyInputColumnMap, + streamingEvaluatorNums, + overflowBatch); isFirstPartition = true; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/rowbytescontainer/VectorRowBytesContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/rowbytescontainer/VectorRowBytesContainer.java new file mode 100644 index 0000000..4b801cc --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/rowbytescontainer/VectorRowBytesContainer.java @@ -0,0 +1,331 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.rowbytescontainer; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.serde2.ByteStream.Output; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; + +/** + * An eager bytes container that puts row bytes to an output stream. + */ +public class VectorRowBytesContainer { + + private static final Logger LOG = LoggerFactory.getLogger(VectorRowBytesContainer.class); + + private File parentDir; + private File tmpFile; + + // We buffer in a org.apache.hadoop.hive.serde2.ByteStream.Output since that is what + // is used by VectorSerializeRow / SerializeWrite. Periodically, we flush this buffer + // to disk. + private Output output; + private int rowBeginPos; + private static final int OUTPUT_SIZE = 4096; + private static final int THRESHOLD = 8 * (OUTPUT_SIZE / 10); + private static final int INPUT_SIZE = 4096; + + private FileOutputStream fileOutputStream; + + private boolean isOpen; + + private byte[] readBuffer; + private byte[] largeRowBuffer; + private int readOffset; + private int readLength; + + private int readNextCount; + private int readNextIndex; + + private static final int MAX_READS = 256; + private byte[][] readNextBytes; + private int readNextOffsets[]; + private int readNextLengths[]; + + private byte[] currentBytes; + private int currentOffset; + private int currentLength; + + private long totalWriteLength; + private long totalReadLength; + + private FileInputStream fileInputStream; + + private final String spillLocalDirs; + + public VectorRowBytesContainer(String spillLocalDirs) { + output = new Output(); + readBuffer = new byte[INPUT_SIZE]; + readNextBytes = new byte[MAX_READS][]; + readNextOffsets = new int[MAX_READS]; + readNextLengths = new int[MAX_READS]; + isOpen = false; + totalWriteLength = 0; + totalReadLength = 0; + this.spillLocalDirs = spillLocalDirs; + } + + private void setupOutputFileStreams() throws IOException { + parentDir = FileUtils.createLocalDirsTempFile(spillLocalDirs, "bytes-container", "", true); + parentDir.deleteOnExit(); + tmpFile = File.createTempFile("BytesContainer", ".tmp", parentDir); + LOG.debug("BytesContainer created temp file " + tmpFile.getAbsolutePath()); + tmpFile.deleteOnExit(); + + fileOutputStream = new FileOutputStream(tmpFile); + } + + private void initFile() { + try { + setupOutputFileStreams(); + } catch (IOException e) { + throw new RuntimeException("Failed to create temporary output file on disk", e); + } + } + + public Output getOuputForRowBytes() { + if (!isOpen) { + initFile(); + isOpen = true; + } + // Reserve space for the int length. + output.reserve(4); + rowBeginPos = output.getLength(); + return output; + } + + public void finishRow() throws IOException { + int length = output.getLength() - rowBeginPos; + output.writeInt(rowBeginPos - 4, length); + if (output.getLength() > THRESHOLD) { + fileOutputStream.write(output.getData(), 0, output.getLength()); + totalWriteLength += output.getLength(); + output.reset(); + } + } + + public void prepareForReading() throws IOException { + if (!isOpen) { + return; + } + if (output.getLength() > 0) { + fileOutputStream.write(output.getData(), 0, output.getLength()); + totalWriteLength += output.getLength(); + fileOutputStream.flush(); + output.reset(); + } + if (fileInputStream != null) { + fileInputStream.close(); + } + fileInputStream = new FileInputStream(tmpFile); + readNextIndex = 0; + readNextCount = 0; + } + + private int readInt() { + int value = (((readBuffer[readOffset] & 0xFF) << 24) | + ((readBuffer[readOffset + 1] & 0xFF) << 16) | + ((readBuffer[readOffset + 2] & 0xFF) << 8) | + ((readBuffer[readOffset + 3] & 0xFF))); + readOffset += 4; + return value; + } + + // Call when nextReadIndex == nextReadCount. + private void bufferedRead() throws IOException { + + // Reset for reading. + readNextIndex = 0; + + // Reset for filling. + readNextCount = 0; + + if (readOffset < readLength) { + // Move unprocessed remainder to beginning of buffer. + int unprocessLength = readLength - readOffset; + System.arraycopy(readBuffer, readOffset, readBuffer, 0, unprocessLength); + + int maxReadLength = readBuffer.length - unprocessLength; + int partialReadLength = fileInputStream.read(readBuffer, unprocessLength, maxReadLength); + if (partialReadLength == -1) { + partialReadLength = 0; + } + totalReadLength += partialReadLength; + readLength = unprocessLength + partialReadLength; + readOffset = 0; + } else { + readOffset = 0; + readLength = fileInputStream.read(readBuffer, 0, readBuffer.length); + if (readLength == -1) { + readLength = 0; + } + totalReadLength += readLength; + } + if (readLength == 0) { + return; + } + if (readLength < 0) { + throw new IOException("Negative read length"); + } + + // Get length word. + if (readLength < 4) { + throw new IOException("Expecting 4 byte length"); + } + + while (true) { + // Use Input class to read length. + int saveReadOffset = readOffset; + int rowLength = readInt(); + if (rowLength < 0) { + throw new IOException("Negative row length"); + } + int remainingLength = readLength - readOffset; + if (remainingLength < rowLength) { + if (readNextCount > 0) { + // Leave this one for the next round. + readOffset = saveReadOffset; + break; + } + + // Buffer needed to bridge. + if (largeRowBuffer == null || largeRowBuffer.length < rowLength) { + int newLargeBufferLength = Math.max(Integer.highestOneBit(rowLength) << 1, INPUT_SIZE); + largeRowBuffer = new byte[newLargeBufferLength]; + } + System.arraycopy(readBuffer, readOffset, largeRowBuffer, 0, remainingLength); + int expectedPartialLength = rowLength - remainingLength; + int partialReadLength = fileInputStream.read(largeRowBuffer, remainingLength, expectedPartialLength); + if (partialReadLength == -1) { + throw new IOException("Unexpected EOF (total write length " + totalWriteLength + + ", total read length " + totalReadLength + ", read length " + + expectedPartialLength + ")"); + } + + if (expectedPartialLength != partialReadLength) { + throw new IOException("Unable to read a complete row of length " + rowLength + + " (total write length " + totalWriteLength + + ", total read length " + totalReadLength + ", read length " + + expectedPartialLength + ", actual length " + partialReadLength + ")"); + } + totalReadLength += partialReadLength; + + readNextBytes[readNextCount] = largeRowBuffer; + readNextOffsets[readNextCount] = 0; + readNextLengths[readNextCount] = rowLength; + + // Indicate we used the last row's bytes for large buffer. + readOffset = readLength; + readNextCount++; + break; + } + + readNextBytes[readNextCount] = readBuffer; + readNextOffsets[readNextCount] = readOffset; + readNextLengths[readNextCount] = rowLength; + readOffset += rowLength; + readNextCount++; + + if (readNextCount >= readNextBytes.length){ + break; + } + if (readLength - readOffset < 4) { + // Handle in next round. + break; + } + } + } + + public boolean readNext() throws IOException { + if (!isOpen) { + return false; + } + if (readNextIndex >= readNextCount) { + bufferedRead(); + // Any more left? + if (readNextIndex >= readNextCount) { + return false; + } + } + + currentBytes = readNextBytes[readNextIndex]; + currentOffset = readNextOffsets[readNextIndex]; + currentLength = readNextLengths[readNextIndex]; + + readNextIndex++; + return true; + } + + public byte[] currentBytes() { + return currentBytes; + } + + public int currentOffset() { + return currentOffset; + } + + public int currentLength() { + return currentLength; + } + + public void resetWrite() throws IOException { + if (!isOpen) { + return; + } + + // Truncate by re-opening FileOutputStream. + fileOutputStream.close(); + fileOutputStream = new FileOutputStream(tmpFile); + } + + public void clear() { + if (fileInputStream != null) { + try { + fileInputStream.close(); + } catch (Throwable ignored) { + } + fileInputStream = null; + } + if (fileOutputStream != null) { + try { + fileOutputStream.close(); + } catch (Throwable ignored) { + } + fileOutputStream = null; + } + + if (parentDir != null) { + try { + FileUtil.fullyDelete(parentDir); + } catch (Throwable ignored) { + } + } + parentDir = null; + tmpFile = null; + isOpen = false; + totalWriteLength = 0; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index a02863b..28400c7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -294,6 +294,8 @@ private boolean isVectorizationGroupByComplexTypesEnabled; private boolean isVectorizedRowIdentifierEnabled; private Collection> rowDeserializeInputFormatExcludes; + private int vectorizedPTFMaxMemoryBufferingBatchCount; + private int vectorizedTestingReducerBatchSize; private boolean isSchemaEvolution; @@ -1278,6 +1280,7 @@ private void convertReduceWork(ReduceWork reduceWork) throws SemanticException { vectorTaskColumnInfo.assume(); reduceWork.setVectorizedVertexNum(++vectorizedVertexNum); + reduceWork.setVectorizedTestingReducerBatchSize(vectorizedTestingReducerBatchSize); boolean ret; try { @@ -1392,7 +1395,8 @@ private boolean validateReduceWork(ReduceWork reduceWork, } // Now check the reduce operator tree. Map opRules = new LinkedHashMap(); - ReduceWorkValidationNodeProcessor vnp = new ReduceWorkValidationNodeProcessor(); + ReduceWorkValidationNodeProcessor vnp = + new ReduceWorkValidationNodeProcessor(vectorTaskColumnInfo); addReduceWorkRules(opRules, vnp); Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null); GraphWalker ogw = new DefaultGraphWalker(disp); @@ -1542,6 +1546,14 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, class ReduceWorkValidationNodeProcessor implements NodeProcessor { + private final VectorTaskColumnInfo vectorTaskColumnInfo; + private final TypeInfo[] reducerBatchTypeInfos; + + public ReduceWorkValidationNodeProcessor(VectorTaskColumnInfo vectorTaskColumnInfo) { + this.vectorTaskColumnInfo = vectorTaskColumnInfo; + reducerBatchTypeInfos = vectorTaskColumnInfo.allTypeInfos.toArray(new TypeInfo[0]); + } + // Children of Vectorized GROUPBY that outputs rows instead of vectorized row batchs. protected final Set> nonVectorizedOps = new HashSet>(); @@ -1563,7 +1575,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return new Boolean(true); } currentOperator = op; - boolean ret = validateReduceWorkOperator(op); + boolean ret = validateReduceWorkOperator(op, reducerBatchTypeInfos); if (!ret) { return new Boolean(false); } @@ -1865,6 +1877,13 @@ public PhysicalContext resolve(PhysicalContext physicalContext) throws SemanticE HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_ROW_IDENTIFIER_ENABLED); + vectorizedPTFMaxMemoryBufferingBatchCount = + HiveConf.getIntVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_PTF_MAX_MEMORY_BUFFERING_BATCH_COUNT); + vectorizedTestingReducerBatchSize = + HiveConf.getIntVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_TESTING_REDUCER_BATCH_SIZE); + isSchemaEvolution = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_SCHEMA_EVOLUTION); @@ -1951,7 +1970,8 @@ boolean validateMapWorkOperator(Operator op, MapWork mWo return ret; } - boolean validateReduceWorkOperator(Operator op) { + boolean validateReduceWorkOperator(Operator op, + TypeInfo[] reducerBatchTypeInfos) { boolean ret; switch (op.getType()) { case MAPJOIN: @@ -1996,7 +2016,8 @@ boolean validateReduceWorkOperator(Operator op) { validateSparkHashTableSinkOperator((SparkHashTableSinkOperator) op); break; case PTF: - ret = validatePTFOperator((PTFOperator) op); + // PTF needs the TypeInfo of the reducer batch. + ret = validatePTFOperator((PTFOperator) op, reducerBatchTypeInfos); break; default: setOperatorNotSupported(op); @@ -2287,7 +2308,7 @@ private boolean containsLeadLag(List exprNodeDescList) { return false; } - private boolean validatePTFOperator(PTFOperator op) { + private boolean validatePTFOperator(PTFOperator op, TypeInfo[] reducerBatchTypeInfos) { if (!isPtfVectorizationEnabled) { setNodeIssue("Vectorization of PTF is not enabled (" + @@ -2323,7 +2344,8 @@ private boolean validatePTFOperator(PTFOperator op) { VectorPTFDesc vectorPTFDesc = null; try { - vectorPTFDesc = createVectorPTFDesc(op, ptfDesc); + vectorPTFDesc = createVectorPTFDesc( + op, ptfDesc, reducerBatchTypeInfos, vectorizedPTFMaxMemoryBufferingBatchCount); } catch (HiveException e) { setOperatorIssue("exception: " + VectorizationContext.getStackTraceAsSingleLine(e)); return false; @@ -3835,7 +3857,8 @@ private static void fillInPTFEvaluators( * VectorizationContext to lookup column names, etc. */ private static VectorPTFDesc createVectorPTFDesc(Operator ptfOp, - PTFDesc ptfDesc) throws HiveException { + PTFDesc ptfDesc, TypeInfo[] reducerBatchTypeInfos, + int vectorizedPTFMaxMemoryBufferingBatchCount) throws HiveException { PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef(); @@ -3849,6 +3872,8 @@ private static VectorPTFDesc createVectorPTFDesc(Operator keyInputColumns, ArrayList nonKeyInputColumns) { - final int outputSize = outputColumnMap.length; + final int outputSize = outputColumnProjectionMap.length; final int orderKeyCount = orderColumnMap.length; final int partitionKeyCount = (isPartitionOrderBy ? partitionColumnMap.length : 0); for (int i = evaluatorCount; i < outputSize; i++) { - final int nonEvalColumnNum = outputColumnMap[i]; + final int nonEvalColumnNum = outputColumnProjectionMap[i]; boolean isKey = false; for (int o = 0; o < orderKeyCount; o++) { if (nonEvalColumnNum == orderColumnMap[o]) { @@ -3957,7 +3989,8 @@ private static void determineKeyAndNonKeyInputColumnMap(int[] outputColumnMap, * execution. */ private static VectorPTFInfo createVectorPTFInfo(Operator ptfOp, - PTFDesc ptfDesc, VectorizationContext vContext) throws HiveException { + PTFDesc ptfDesc, VectorizationContext vContext) + throws HiveException { PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef(); @@ -3978,17 +4011,21 @@ private static VectorPTFInfo createVectorPTFInfo(Operator keyInputColumns = new ArrayList(); ArrayList nonKeyInputColumns = new ArrayList(); - determineKeyAndNonKeyInputColumnMap(outputColumnMap, isPartitionOrderBy, orderColumnMap, + determineKeyAndNonKeyInputColumnMap(outputColumnProjectionMap, isPartitionOrderBy, orderColumnMap, partitionColumnMap, evaluatorCount, keyInputColumns, nonKeyInputColumns); int[] keyInputColumnMap = ArrayUtils.toPrimitive(keyInputColumns.toArray(new Integer[0])); int[] nonKeyInputColumnMap = ArrayUtils.toPrimitive(nonKeyInputColumns.toArray(new Integer[0])); @@ -4075,7 +4112,7 @@ private static VectorPTFInfo createVectorPTFInfo(Operator streamingColumns = new ArrayList(); + ArrayList streamingEvaluatorNums = new ArrayList(); for (int i = 0; i < evaluatorCount; i++) { final VectorPTFEvaluatorBase evaluator = evaluators[i]; if (evaluator.streamsResult()) { - streamingColumns.add(evaluator.getOutputColumnNum()); + streamingEvaluatorNums.add(i); } } - return ArrayUtils.toPrimitive(streamingColumns.toArray(new Integer[0])); + return ArrayUtils.toPrimitive(streamingEvaluatorNums.toArray(new Integer[0])); + } + + public TypeInfo[] getReducerBatchTypeInfos() { + return reducerBatchTypeInfos; + } + + public void setReducerBatchTypeInfos(TypeInfo[] reducerBatchTypeInfos) { + this.reducerBatchTypeInfos = reducerBatchTypeInfos; } public boolean getIsPartitionOrderBy() { @@ -357,4 +372,14 @@ public void setVectorPTFInfo(VectorPTFInfo vectorPTFInfo) { public VectorPTFInfo getVectorPTFInfo() { return vectorPTFInfo; } + + public void setVectorizedPTFMaxMemoryBufferingBatchCount( + int vectorizedPTFMaxMemoryBufferingBatchCount) { + this.vectorizedPTFMaxMemoryBufferingBatchCount = vectorizedPTFMaxMemoryBufferingBatchCount; + } + + public int getVectorizedPTFMaxMemoryBufferingBatchCount() { + return vectorizedPTFMaxMemoryBufferingBatchCount; + } + } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowBytesContainer.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowBytesContainer.java new file mode 100644 index 0000000..d9e2dbd --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowBytesContainer.java @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.util.Random; + +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.RandomByteArrayStream; +import org.apache.hadoop.hive.ql.exec.vector.rowbytescontainer.VectorRowBytesContainer; +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.junit.Test; + +import static org.junit.Assert.*; + +public class TestVectorRowBytesContainer { + + public void doFillReplay(Random random, int maxCount) throws Exception { + + RandomByteArrayStream randomByteArrayStream = new RandomByteArrayStream(random); + VectorRowBytesContainer vectorMapJoinRowBytesContainer = + new VectorRowBytesContainer(null); + + int count = Math.min(maxCount, random.nextInt(500)); + for (int i = 0; i < count; i++) { + byte[] bytes = randomByteArrayStream.next(); + Output output = vectorMapJoinRowBytesContainer.getOuputForRowBytes(); + output.write(bytes); + vectorMapJoinRowBytesContainer.finishRow(); + } + vectorMapJoinRowBytesContainer.prepareForReading(); + + for (int i = 0; i < count; i++) { + if (!vectorMapJoinRowBytesContainer.readNext()) { + assertTrue(false); + } + byte[] readBytes = vectorMapJoinRowBytesContainer.currentBytes(); + int readOffset = vectorMapJoinRowBytesContainer.currentOffset(); + int readLength = vectorMapJoinRowBytesContainer.currentLength(); + byte[] expectedBytes = randomByteArrayStream.get(i); + if (readLength != expectedBytes.length) { + assertTrue(false); + } + for (int j = 0; j < readLength; j++) { + byte readByte = readBytes[readOffset + j]; + byte expectedByte = expectedBytes[j]; + if (readByte != expectedByte) { + assertTrue(false); + } + } + } + } + + @Test + public void testFillReplay() throws Exception { + Random random = new Random(47496); + + for (int i = 0; i < 10; i++) { + doFillReplay(random, 1 << i); + } + } +} \ No newline at end of file diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinRowBytesContainer.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinRowBytesContainer.java deleted file mode 100644 index afe4e70..0000000 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinRowBytesContainer.java +++ /dev/null @@ -1,75 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; - -import java.util.Random; - -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinRowBytesContainer; -import org.apache.hadoop.hive.serde2.ByteStream.Output; -import org.junit.Test; - -import static org.junit.Assert.*; - -public class TestVectorMapJoinRowBytesContainer { - - public void doFillReplay(Random random, int maxCount) throws Exception { - - RandomByteArrayStream randomByteArrayStream = new RandomByteArrayStream(random); - VectorMapJoinRowBytesContainer vectorMapJoinRowBytesContainer = - new VectorMapJoinRowBytesContainer(null); - - int count = Math.min(maxCount, random.nextInt(500)); - for (int i = 0; i < count; i++) { - byte[] bytes = randomByteArrayStream.next(); - Output output = vectorMapJoinRowBytesContainer.getOuputForRowBytes(); - output.write(bytes); - vectorMapJoinRowBytesContainer.finishRow(); - } - vectorMapJoinRowBytesContainer.prepareForReading(); - - for (int i = 0; i < count; i++) { - if (!vectorMapJoinRowBytesContainer.readNext()) { - assertTrue(false); - } - byte[] readBytes = vectorMapJoinRowBytesContainer.currentBytes(); - int readOffset = vectorMapJoinRowBytesContainer.currentOffset(); - int readLength = vectorMapJoinRowBytesContainer.currentLength(); - byte[] expectedBytes = randomByteArrayStream.get(i); - if (readLength != expectedBytes.length) { - assertTrue(false); - } - for (int j = 0; j < readLength; j++) { - byte readByte = readBytes[readOffset + j]; - byte expectedByte = expectedBytes[j]; - if (readByte != expectedByte) { - assertTrue(false); - } - } - } - } - - @Test - public void testFillReplay() throws Exception { - Random random = new Random(47496); - - for (int i = 0; i < 10; i++) { - doFillReplay(random, 1 << i); - } - } -} \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_ptf_part_simple.q ql/src/test/queries/clientpositive/vector_ptf_part_simple.q index bd3b3e4..fc9f9eb 100644 --- ql/src/test/queries/clientpositive/vector_ptf_part_simple.q +++ ql/src/test/queries/clientpositive/vector_ptf_part_simple.q @@ -502,3 +502,50 @@ from vector_ptf_part_simple_orc; select p_mfgr, p_name, p_retailprice, rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r from vector_ptf_part_simple_orc; + + +-- +-- Run some tests with these parameters that force spilling to disk. +-- +set hive.vectorized.ptf.max.memory.buffering.batch.count=1; +set hive.vectorized.testing.reducer.batch.size=2; + +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr) as rn, +rank() over(partition by p_mfgr) as r, +dense_rank() over(partition by p_mfgr) as dr, +first_value(p_retailprice) over(partition by p_mfgr) as fv, +last_value(p_retailprice) over(partition by p_mfgr) as lv, +count(p_retailprice) over(partition by p_mfgr) as c, +count(*) over(partition by p_mfgr) as cs +from vector_ptf_part_simple_orc; + +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name) as rn, +rank() over(partition by p_mfgr order by p_name) as r, +dense_rank() over(partition by p_mfgr order by p_name) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name) as c, +count(*) over(partition by p_mfgr order by p_name) as cs +from vector_ptf_part_simple_orc; + + +select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr) as r +from vector_ptf_part_simple_orc; + +select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr order by p_name) as r +from vector_ptf_part_simple_orc; + + +select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r +from vector_ptf_part_simple_orc; + + +select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end) as r +from vector_ptf_part_simple_orc; + diff --git ql/src/test/queries/clientpositive/vector_windowing_expressions.q ql/src/test/queries/clientpositive/vector_windowing_expressions.q index 7d8c5d5..6a37c4e 100644 --- ql/src/test/queries/clientpositive/vector_windowing_expressions.q +++ ql/src/test/queries/clientpositive/vector_windowing_expressions.q @@ -92,3 +92,38 @@ round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50. from part window w1 as (distribute by p_mfgr sort by p_retailprice) limit 11; + + +-- +-- Run some tests with these parameters that force spilling to disk. +-- +set hive.vectorized.ptf.max.memory.buffering.batch.count=1; +set hive.vectorized.testing.reducer.batch.size=2; + +select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), +max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1 +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +; + +select p_mfgr, p_retailprice, p_size, +rank() over (distribute by p_mfgr sort by p_retailprice) as r, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1 +from part +; + +select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part; + +select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part; + +from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select * ; +select * from t1 limit 3; +select * from t2 limit 3; + +select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2) +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +limit 11; diff --git ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out index 614cec3..5c978d7 100644 --- ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out +++ ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out @@ -5947,3 +5947,345 @@ Manufacturer#5 almond antique medium spring khaki 1611.66 2 Manufacturer#5 almond antique sky peru orange 1788.73 4 Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 5 Manufacturer#5 almond azure blanched chiffon midnight 1464.48 6 +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr) as rn, +rank() over(partition by p_mfgr) as r, +dense_rank() over(partition by p_mfgr) as dr, +first_value(p_retailprice) over(partition by p_mfgr) as fv, +last_value(p_retailprice) over(partition by p_mfgr) as lv, +count(p_retailprice) over(partition by p_mfgr) as c, +count(*) over(partition by p_mfgr) as cs +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr) as rn, +rank() over(partition by p_mfgr) as r, +dense_rank() over(partition by p_mfgr) as dr, +first_value(p_retailprice) over(partition by p_mfgr) as fv, +last_value(p_retailprice) over(partition by p_mfgr) as lv, +count(p_retailprice) over(partition by p_mfgr) as c, +count(*) over(partition by p_mfgr) as cs +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice rn r dr fv lv c cs +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 1 1 1 1290.35 1206.26 4 6 +Manufacturer#4 almond antique violet mint lemon 1375.42 2 1 1 1290.35 1206.26 4 6 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 3 1 1 1290.35 1206.26 4 6 +Manufacturer#4 almond antique gainsboro frosted violet NULL 4 1 1 1290.35 1206.26 4 6 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 5 1 1 1290.35 1206.26 4 6 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 6 1 1 1290.35 1206.26 4 6 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 1 1 1 1464.48 1788.73 6 6 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 2 1 1 1464.48 1788.73 6 6 +Manufacturer#5 almond antique medium spring khaki 1611.66 3 1 1 1464.48 1788.73 6 6 +Manufacturer#5 almond antique blue firebrick mint 1789.69 4 1 1 1464.48 1788.73 6 6 +Manufacturer#5 almond antique medium spring khaki 1611.66 5 1 1 1464.48 1788.73 6 6 +Manufacturer#5 almond antique sky peru orange 1788.73 6 1 1 1464.48 1788.73 6 6 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 1 1 1 900.66 1800.7 8 8 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 2 1 1 900.66 1800.7 8 8 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 3 1 1 900.66 1800.7 8 8 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 4 1 1 900.66 1800.7 8 8 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 5 1 1 900.66 1800.7 8 8 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 6 1 1 900.66 1800.7 8 8 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 7 1 1 900.66 1800.7 8 8 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 8 1 1 900.66 1800.7 8 8 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 1 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 2 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 3 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 4 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 5 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond antique burnished rose metallic 1173.15 6 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 7 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 8 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond antique burnished rose metallic 1173.15 9 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 10 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 11 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 12 1 1 1753.76 1632.66 11 12 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 1 1 1 590.27 99.68 7 8 +Manufacturer#3 almond antique metallic orange dim 55.39 2 1 1 590.27 99.68 7 8 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 3 1 1 590.27 99.68 7 8 +Manufacturer#3 almond antique olive coral navajo 1337.29 4 1 1 590.27 99.68 7 8 +Manufacturer#3 almond antique misty red olive 1922.98 5 1 1 590.27 99.68 7 8 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 6 1 1 590.27 99.68 7 8 +Manufacturer#3 almond antique forest lavender goldenrod NULL 7 1 1 590.27 99.68 7 8 +Manufacturer#3 almond antique chartreuse khaki white 99.68 8 1 1 590.27 99.68 7 8 +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name) as rn, +rank() over(partition by p_mfgr order by p_name) as r, +dense_rank() over(partition by p_mfgr order by p_name) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name) as c, +count(*) over(partition by p_mfgr order by p_name) as cs +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name) as rn, +rank() over(partition by p_mfgr order by p_name) as r, +dense_rank() over(partition by p_mfgr order by p_name) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name) as c, +count(*) over(partition by p_mfgr order by p_name) as cs +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice rn r dr fv lv c cs +Manufacturer#1 almond antique burnished rose metallic 1173.15 1 1 1 1173.15 1173.15 2 2 +Manufacturer#1 almond antique burnished rose metallic 1173.15 2 1 1 1173.15 1173.15 2 2 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 3 2 1173.15 1753.76 6 6 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 4 3 2 1173.15 1753.76 6 6 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 5 3 2 1173.15 1753.76 6 6 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 6 3 2 1173.15 1753.76 6 6 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 7 7 3 1173.15 1602.59 7 7 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 8 8 4 1173.15 1414.42 8 8 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 9 5 1173.15 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 10 9 5 1173.15 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 11 9 5 1173.15 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 12 9 5 1173.15 1632.66 11 12 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 1 1 1 1690.68 1690.68 1 1 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 2 2 2 1690.68 1800.7 4 4 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 3 2 2 1690.68 1800.7 4 4 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 4 2 2 1690.68 1800.7 4 4 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 5 5 3 1690.68 2031.98 5 5 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 6 6 4 1690.68 1698.66 7 7 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 7 6 4 1690.68 1698.66 7 7 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 8 8 5 1690.68 1000.6 8 8 +Manufacturer#3 almond antique chartreuse khaki white 99.68 1 1 1 99.68 99.68 1 1 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 2 2 2 99.68 1190.27 4 5 +Manufacturer#3 almond antique forest lavender goldenrod NULL 3 2 2 99.68 1190.27 4 5 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 4 2 2 99.68 1190.27 4 5 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 5 2 2 99.68 1190.27 4 5 +Manufacturer#3 almond antique metallic orange dim 55.39 6 6 3 99.68 55.39 5 6 +Manufacturer#3 almond antique misty red olive 1922.98 7 7 4 99.68 1922.98 6 7 +Manufacturer#3 almond antique olive coral navajo 1337.29 8 8 5 99.68 1337.29 7 8 +Manufacturer#4 almond antique gainsboro frosted violet NULL 1 1 1 NULL NULL 0 1 +Manufacturer#4 almond antique violet mint lemon 1375.42 2 2 2 NULL 1375.42 1 2 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 3 3 3 NULL 1206.26 2 4 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 4 3 3 NULL 1206.26 2 4 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 5 5 4 NULL 1844.92 3 5 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 6 6 5 NULL 1290.35 4 6 +Manufacturer#5 almond antique blue firebrick mint 1789.69 1 1 1 1789.69 1789.69 1 1 +Manufacturer#5 almond antique medium spring khaki 1611.66 2 2 2 1789.69 1611.66 3 3 +Manufacturer#5 almond antique medium spring khaki 1611.66 3 2 2 1789.69 1611.66 3 3 +Manufacturer#5 almond antique sky peru orange 1788.73 4 4 3 1789.69 1788.73 4 4 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 5 5 4 1789.69 1018.1 5 5 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 6 6 5 1789.69 1464.48 6 6 +PREHOOK: query: select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_retailprice r +Manufacturer#4 1290.35 1 +Manufacturer#4 1375.42 1 +Manufacturer#4 NULL 1 +Manufacturer#4 NULL 1 +Manufacturer#4 1844.92 1 +Manufacturer#4 1206.26 1 +Manufacturer#5 1464.48 1 +Manufacturer#5 1018.1 1 +Manufacturer#5 1611.66 1 +Manufacturer#5 1789.69 1 +Manufacturer#5 1611.66 1 +Manufacturer#5 1788.73 1 +Manufacturer#2 900.66 1 +Manufacturer#2 1698.66 1 +Manufacturer#2 1800.7 1 +Manufacturer#2 1690.68 1 +Manufacturer#2 1800.7 1 +Manufacturer#2 1000.6 1 +Manufacturer#2 2031.98 1 +Manufacturer#2 1800.7 1 +Manufacturer#1 1753.76 1 +Manufacturer#1 1632.66 1 +Manufacturer#1 1632.66 1 +Manufacturer#1 1753.76 1 +Manufacturer#1 1414.42 1 +Manufacturer#1 1173.15 1 +Manufacturer#1 1602.59 1 +Manufacturer#1 1753.76 1 +Manufacturer#1 1173.15 1 +Manufacturer#1 1753.76 1 +Manufacturer#1 NULL 1 +Manufacturer#1 1632.66 1 +Manufacturer#3 590.27 1 +Manufacturer#3 55.39 1 +Manufacturer#3 1190.27 1 +Manufacturer#3 1337.29 1 +Manufacturer#3 1922.98 1 +Manufacturer#3 1190.27 1 +Manufacturer#3 NULL 1 +Manufacturer#3 99.68 1 +PREHOOK: query: select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr order by p_name) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr order by p_name) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_retailprice r +Manufacturer#1 1173.15 1 +Manufacturer#1 1173.15 1 +Manufacturer#1 1753.76 3 +Manufacturer#1 1753.76 3 +Manufacturer#1 1753.76 3 +Manufacturer#1 1753.76 3 +Manufacturer#1 1602.59 7 +Manufacturer#1 1414.42 8 +Manufacturer#1 1632.66 9 +Manufacturer#1 NULL 9 +Manufacturer#1 1632.66 9 +Manufacturer#1 1632.66 9 +Manufacturer#2 1690.68 1 +Manufacturer#2 1800.7 2 +Manufacturer#2 1800.7 2 +Manufacturer#2 1800.7 2 +Manufacturer#2 2031.98 5 +Manufacturer#2 900.66 6 +Manufacturer#2 1698.66 6 +Manufacturer#2 1000.6 8 +Manufacturer#3 99.68 1 +Manufacturer#3 590.27 2 +Manufacturer#3 NULL 2 +Manufacturer#3 1190.27 2 +Manufacturer#3 1190.27 2 +Manufacturer#3 55.39 6 +Manufacturer#3 1922.98 7 +Manufacturer#3 1337.29 8 +Manufacturer#4 NULL 1 +Manufacturer#4 1375.42 2 +Manufacturer#4 NULL 3 +Manufacturer#4 1206.26 3 +Manufacturer#4 1844.92 5 +Manufacturer#4 1290.35 6 +Manufacturer#5 1789.69 1 +Manufacturer#5 1611.66 2 +Manufacturer#5 1611.66 2 +Manufacturer#5 1788.73 4 +Manufacturer#5 1018.1 5 +Manufacturer#5 1464.48 6 +PREHOOK: query: select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice r +Manufacturer#1 almond antique burnished rose metallic 1173.15 1 +Manufacturer#1 almond antique burnished rose metallic 1173.15 1 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 7 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 8 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 9 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 1 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 2 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 2 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 2 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 5 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 6 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 6 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 8 +Manufacturer#3 almond antique chartreuse khaki white 99.68 1 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 2 +Manufacturer#3 almond antique forest lavender goldenrod NULL 2 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 2 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 2 +Manufacturer#3 almond antique metallic orange dim 55.39 6 +Manufacturer#3 almond antique misty red olive 1922.98 7 +Manufacturer#3 almond antique olive coral navajo 1337.29 8 +Manufacturer#4 almond antique gainsboro frosted violet NULL 1 +Manufacturer#4 almond antique violet mint lemon 1375.42 2 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 3 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 3 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 5 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 6 +Manufacturer#5 almond antique blue firebrick mint 1789.69 1 +Manufacturer#5 almond antique medium spring khaki 1611.66 2 +Manufacturer#5 almond antique medium spring khaki 1611.66 2 +Manufacturer#5 almond antique sky peru orange 1788.73 4 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 5 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 6 +PREHOOK: query: select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice r +Manufacturer#1 almond aquamarine burnished black steel 1414.42 1 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 1 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 1 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 1 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 1 +Manufacturer#1 almond antique burnished rose metallic 1173.15 1 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 1 +Manufacturer#1 almond antique burnished rose metallic 1173.15 1 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 1 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 1 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 1 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 1 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 1 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 1 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 1 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 1 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 1 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 1 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 1 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 1 +Manufacturer#4 almond antique violet mint lemon 1375.42 1 +Manufacturer#4 almond antique gainsboro frosted violet NULL 1 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 1 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 1 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 1 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 1 +Manufacturer#3 almond antique misty red olive 1922.98 1 +Manufacturer#3 almond antique chartreuse khaki white 99.68 1 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 1 +Manufacturer#3 almond antique metallic orange dim 55.39 1 +Manufacturer#3 almond antique olive coral navajo 1337.29 1 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 1 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 1 +Manufacturer#3 almond antique forest lavender goldenrod NULL 1 +Manufacturer#5 almond antique medium spring khaki 1611.66 1 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 1 +Manufacturer#5 almond antique sky peru orange 1788.73 1 +Manufacturer#5 almond antique medium spring khaki 1611.66 1 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 1 +Manufacturer#5 almond antique blue firebrick mint 1789.69 1 diff --git ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out new file mode 100644 index 0000000..a734e22 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out @@ -0,0 +1,2245 @@ +PREHOOK: query: drop table over10k +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table over10k +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) + row format delimited + fields terminated by '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over10k +POSTHOOK: query: create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) + row format delimited + fields terminated by '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over10k +PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k +POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k +PREHOOK: query: explain vectorization detail +select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), +max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1 +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), +max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1 +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_retailprice (type: double) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: [2, 7] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumns: [2] + valueColumns: [5] + Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: lead and lag function not supported in argument expression of aggregation function sum + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 9828 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col7 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: sum_window_1 + arguments: lag(...) + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: last_value_window_2 + arguments: _col7 + name: last_value + window function: GenericUDAFLastValueEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: max_window_3 + arguments: _col7 + name: max + window function: GenericUDAFMaxEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: min_window_4 + arguments: _col7 + name: min + window function: GenericUDAFMinEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: first_value_window_5 + arguments: _col7 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + Lead/Lag information: lag(...) (type: double) + Statistics: Num rows: 26 Data size: 9828 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col7 (type: double), _col5 (type: int), (round(sum_window_0, 2) = round((sum_window_1 + last_value_window_2), 2)) (type: boolean), ((max_window_3 - min_window_4) = (last_value_window_2 - first_value_window_5)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 26 Data size: 3068 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3068 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), +max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1 +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), +max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1 +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_retailprice p_size _c3 _c4 +Manufacturer#1 1173.15 2 true true +Manufacturer#1 1173.15 2 true true +Manufacturer#1 1414.42 28 true true +Manufacturer#1 1602.59 6 true true +Manufacturer#1 1632.66 42 true true +Manufacturer#1 1753.76 34 true true +Manufacturer#2 1690.68 14 true true +Manufacturer#2 1698.66 25 true true +Manufacturer#2 1701.6 18 true true +Manufacturer#2 1800.7 40 true true +Manufacturer#2 2031.98 2 true true +Manufacturer#3 1190.27 14 true true +Manufacturer#3 1337.29 45 true true +Manufacturer#3 1410.39 19 true true +Manufacturer#3 1671.68 17 true true +Manufacturer#3 1922.98 1 true true +Manufacturer#4 1206.26 27 true true +Manufacturer#4 1290.35 12 true true +Manufacturer#4 1375.42 39 true true +Manufacturer#4 1620.67 10 true true +Manufacturer#4 1844.92 7 true true +Manufacturer#5 1018.1 46 true true +Manufacturer#5 1464.48 23 true true +Manufacturer#5 1611.66 6 true true +Manufacturer#5 1788.73 2 true true +Manufacturer#5 1789.69 31 true true +PREHOOK: query: explain vectorization detail +select p_mfgr, p_retailprice, p_size, +rank() over (distribute by p_mfgr sort by p_retailprice) as r, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1 +from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_retailprice, p_size, +rank() over (distribute by p_mfgr sort by p_retailprice) as r, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1 +from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_retailprice (type: double) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: [2, 7] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumns: [2] + valueColumns: [5] + Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 9828 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col7 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col7 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: sum_window_1 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~CURRENT + Statistics: Num rows: 26 Data size: 9828 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col7 (type: double), _col5 (type: int), rank_window_0 (type: int), sum_window_1 (type: double), (sum_window_1 - 5.0) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3380 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3380 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_retailprice, p_size, +rank() over (distribute by p_mfgr sort by p_retailprice) as r, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1 +from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_retailprice, p_size, +rank() over (distribute by p_mfgr sort by p_retailprice) as r, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1 +from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_retailprice p_size r s2 s1 +Manufacturer#1 1173.15 2 1 1173.15 1168.15 +Manufacturer#1 1173.15 2 1 2346.3 2341.3 +Manufacturer#1 1414.42 28 3 3760.7200000000003 3755.7200000000003 +Manufacturer#1 1602.59 6 4 5363.31 5358.31 +Manufacturer#1 1632.66 42 5 6995.97 6990.97 +Manufacturer#1 1753.76 34 6 8749.73 8744.73 +Manufacturer#2 1690.68 14 1 1690.68 1685.68 +Manufacturer#2 1698.66 25 2 3389.34 3384.34 +Manufacturer#2 1701.6 18 3 5090.9400000000005 5085.9400000000005 +Manufacturer#2 1800.7 40 4 6891.64 6886.64 +Manufacturer#2 2031.98 2 5 8923.62 8918.62 +Manufacturer#3 1190.27 14 1 1190.27 1185.27 +Manufacturer#3 1337.29 45 2 2527.56 2522.56 +Manufacturer#3 1410.39 19 3 3937.95 3932.95 +Manufacturer#3 1671.68 17 4 5609.63 5604.63 +Manufacturer#3 1922.98 1 5 7532.610000000001 7527.610000000001 +Manufacturer#4 1206.26 27 1 1206.26 1201.26 +Manufacturer#4 1290.35 12 2 2496.6099999999997 2491.6099999999997 +Manufacturer#4 1375.42 39 3 3872.0299999999997 3867.0299999999997 +Manufacturer#4 1620.67 10 4 5492.7 5487.7 +Manufacturer#4 1844.92 7 5 7337.62 7332.62 +Manufacturer#5 1018.1 46 1 1018.1 1013.1 +Manufacturer#5 1464.48 23 2 2482.58 2477.58 +Manufacturer#5 1611.66 6 3 4094.24 4089.24 +Manufacturer#5 1788.73 2 4 5882.969999999999 5877.969999999999 +Manufacturer#5 1789.69 31 5 7672.66 7667.66 +PREHOOK: query: explain vectorization detail +select s, si, f, si - lead(f, 3) over (partition by t order by bo,s,si,f desc) from over10k limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, si, f, si - lead(f, 3) over (partition by t order by bo,s,si,f desc) from over10k limit 100 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: t (type: tinyint), bo (type: boolean), s (type: string), si (type: smallint), f (type: float) + sort order: ++++- + Map-reduce partition columns: t (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: [0, 6, 7, 1, 4] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumns: [0] + valueColumns: [] + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [0, 1, 4, 6, 7] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: lead not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey3 (type: smallint), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col0, _col1, _col4, _col6, _col7 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: tinyint, _col1: smallint, _col4: float, _col6: boolean, _col7: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col6 ASC NULLS FIRST, _col7 ASC NULLS FIRST, _col1 ASC NULLS FIRST, _col4 DESC NULLS LAST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: lead_window_0 + arguments: _col4, 3 + name: lead + window function: GenericUDAFLeadEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), _col1 (type: smallint), _col4 (type: float), (UDFToFloat(_col1) - lead_window_0) (type: float) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select s, si, f, si - lead(f, 3) over (partition by t order by bo,s,si,f desc) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, si, f, si - lead(f, 3) over (partition by t order by bo,s,si,f desc) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s si f _c3 +alice allen 400 76.31 337.23 +alice davidson 384 71.97 357.79 +alice king 455 2.48 395.93 +alice king 458 62.77 384.16998 +alice xylophone 485 26.21 464.05 +bob falkner 260 59.07 242.4 +bob ichabod 454 73.83 381.7 +bob polk 264 20.95 257.17 +bob underhill 454 17.6 424.94 +bob underhill 465 72.3 453.17 +bob van buren 433 6.83 398.4 +calvin ichabod 431 29.06 334.22 +david garcia 485 11.83 421.51 +ethan steinbeck 298 34.6 288.14 +fred ellison 376 96.78 330.76 +holly steinbeck 384 63.49 293.7 +holly underhill 318 9.86 269.91 +irene ellison 458 45.24 365.29 +irene underhill 307 90.3 244.19 +jessica johnson 494 48.09 490.18 +jessica king 459 92.71 452.2 +jessica white 284 62.81 209.08 +luke garcia 311 3.82 267.27 +luke young 451 6.8 429.0 +mike king 275 74.92 211.81 +oscar garcia 362 43.73 340.66 +priscilla laertes 316 22.0 296.06 +priscilla quirinius 423 63.19 362.72 +priscilla zipper 485 21.34 400.61 +quinn ellison 266 19.94 209.95 +quinn polk 507 60.28 447.66 +sarah robinson 320 84.39 309.74 +tom polk 346 56.05 320.33 +ulysses ellison 381 59.34 358.66 +ulysses quirinius 303 10.26 259.6 +ulysses robinson 313 25.67 269.31 +ulysses steinbeck 333 22.34 270.61 +victor allen 337 43.4 311.5 +victor hernandez 447 43.69 375.22 +victor xylophone 438 62.39 424.33 +wendy quirinius 279 25.5 250.25 +wendy robinson 275 71.78 262.88 +wendy xylophone 314 13.67 295.73 +xavier garcia 493 28.75 474.56 +zach thompson 386 12.12 377.63 +zach young 286 18.27 263.65 +alice falkner 280 18.44 227.7 +bob ellison 339 8.37 300.95 +bob johnson 374 22.35 326.49 +calvin white 280 52.3 198.32 +david carson 270 38.05 255.77 +david falkner 469 47.51 388.35 +david hernandez 408 81.68 339.27 +ethan underhill 339 14.23 256.26 +gabriella brown 498 80.65 413.25 +holly nixon 505 68.73 440.71 +holly polk 268 82.74 182.04001 +holly thompson 387 84.75 298.22 +irene young 458 64.29 401.8 +jessica miller 299 85.96 243.41 +katie ichabod 469 88.78 385.61 +luke ichabod 289 56.2 286.74 +luke king 337 55.59 274.88 +mike allen 465 83.39 383.03 +mike polk 500 2.26 427.74 +mike white 454 62.12 430.78 +mike xylophone 448 81.97 447.17 +nick nixon 335 72.26 240.78 +nick robinson 350 23.22 294.59 +oscar davidson 432 0.83 420.93 +oscar johnson 315 94.22 233.05 +oscar johnson 469 55.41 468.44 +oscar miller 324 11.07 265.19 +rachel davidson 507 81.95 468.78 +rachel thompson 344 0.56 246.12 +sarah miller 386 58.81 304.36 +sarah xylophone 275 38.22 177.48999 +sarah zipper 376 97.88 294.61 +tom hernandez 467 81.64 459.9 +tom hernandez 477 97.51 415.19 +tom steinbeck 414 81.39 361.87 +ulysses carson 343 7.1 314.22 +victor robinson 415 61.81 349.5 +victor thompson 344 52.13 NULL +xavier ovid 280 28.78 NULL +yuri xylophone 430 65.5 NULL +alice underhill 389 26.68 368.06 +alice underhill 446 6.49 444.21 +bob ovid 331 67.12 236.43 +bob van buren 406 20.94 383.32 +david falkner 406 1.79 374.34 +david miller 450 94.57 380.13 +ethan allen 380 22.68 375.6 +ethan king 395 31.66 361.51 +ethan nixon 475 69.87 431.39 +ethan polk 283 4.4 243.82 +fred allen 331 33.49 281.68 +fred king 511 43.61 457.22 +fred polk 261 39.18 248.73 +fred young 303 49.32 221.51001 +PREHOOK: query: explain vectorization detail +select s, i, i - lead(i, 3, 0) over (partition by si order by i,s) from over10k limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, i, i - lead(i, 3, 0) over (partition by si order by i,s) from over10k limit 100 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: si (type: smallint), i (type: int), s (type: string) + sort order: +++ + Map-reduce partition columns: si (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: [1, 2, 7] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumns: [1] + valueColumns: [] + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [1, 2, 7] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: lead not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col1, _col2, _col7 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: smallint, _col2: int, _col7: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST, _col7 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: lead_window_0 + arguments: _col2, 3, 0 + name: lead + window function: GenericUDAFLeadEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), _col2 (type: int), (_col2 - lead_window_0) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select s, i, i - lead(i, 3, 0) over (partition by si order by i,s) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, i, i - lead(i, 3, 0) over (partition by si order by i,s) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s i _c2 +wendy garcia 65540 -18 +ethan thompson 65543 -20 +zach nixon 65549 -31 +alice robinson 65558 -28 +wendy nixon 65563 -33 +victor robinson 65580 -19 +ethan falkner 65586 -18 +victor davidson 65596 -17 +xavier quirinius 65599 -14 +fred quirinius 65604 -11 +nick zipper 65613 -3 +xavier van buren 65613 -7 +victor johnson 65615 -12 +alice ovid 65616 -24 +xavier ovid 65620 -23 +ulysses white 65627 -24 +sarah white 65640 -13 +calvin young 65643 -25 +victor thompson 65651 -42 +calvin johnson 65653 -53 +irene polk 65668 -45 +zach underhill 65693 -38 +quinn hernandez 65706 -27 +rachel ovid 65713 -24 +gabriella falkner 65731 -7 +zach white 65733 -8 +fred hernandez 65737 -7 +rachel ellison 65738 -6 +oscar steinbeck 65741 -6 +alice ellison 65744 -8 +tom allen 65744 -19 +quinn quirinius 65747 -31 +victor hernandez 65752 -26 +holly xylophone 65763 -26 +david davidson 65778 65778 +ulysses young 65778 65778 +sarah brown 65789 65789 +xavier brown 65541 -16 +zach hernandez 65542 -18 +katie ichabod 65547 -19 +oscar young 65557 -15 +holly white 65560 -14 +priscilla laertes 65566 -9 +ethan king 65572 -6 +zach hernandez 65574 -10 +oscar thompson 65575 -13 +victor xylophone 65578 -16 +gabriella ellison 65584 -26 +nick quirinius 65588 -22 +holly robinson 65594 -18 +alice xylophone 65610 -16 +yuri brown 65610 -21 +sarah hernandez 65612 -26 +katie garcia 65626 -28 +jessica laertes 65631 -23 +ethan underhill 65638 -17 +irene young 65654 -37 +priscilla thompson 65654 -40 +luke quirinius 65655 -44 +david brown 65691 -20 +luke falkner 65694 -18 +priscilla miller 65699 -20 +rachel robinson 65711 -9 +ethan polk 65712 -10 +wendy brown 65719 -13 +mike underhill 65720 -18 +zach underhill 65722 -26 +nick zipper 65732 -20 +fred brown 65738 -18 +ulysses young 65748 -23 +nick davidson 65752 -19 +fred zipper 65756 -15 +yuri nixon 65771 -10 +zach hernandez 65771 -19 +zach zipper 65771 65771 +alice underhill 65781 65781 +oscar laertes 65790 65790 +sarah zipper 65546 -19 +bob falkner 65551 -17 +luke ovid 65551 -17 +katie allen 65565 -4 +nick falkner 65568 -5 +zach steinbeck 65568 -11 +oscar van buren 65569 -13 +gabriella young 65573 -11 +jessica ichabod 65579 -24 +david garcia 65582 -24 +nick xylophone 65584 -27 +calvin johnson 65603 -14 +xavier zipper 65606 -50 +alice nixon 65611 -58 +jessica laertes 65617 -62 +fred king 65656 -61 +priscilla underhill 65669 -48 +priscilla zipper 65679 -45 +nick king 65717 -11 +sarah polk 65717 -17 +irene quirinius 65724 -28 +tom laertes 65728 -25 +yuri johnson 65734 -27 +PREHOOK: query: explain vectorization detail +select s, si, d, si - lag(d, 3) over (partition by b order by si,s,d) from over10k limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, si, d, si - lag(d, 3) over (partition by b order by si,s,d) from over10k limit 100 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: b (type: bigint), si (type: smallint), s (type: string), d (type: double) + sort order: ++++ + Map-reduce partition columns: b (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: [3, 1, 7, 5] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumns: [3] + valueColumns: [] + Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [1, 3, 5, 7] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: lag not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey3 (type: double), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col1, _col3, _col5, _col7 + Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: smallint, _col3: bigint, _col5: double, _col7: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST, _col7 ASC NULLS FIRST, _col5 ASC NULLS FIRST + partition by: _col3 + raw input shape: + window functions: + window function definition + alias: lag_window_0 + arguments: _col5, 3 + name: lag + window function: GenericUDAFLagEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), _col1 (type: smallint), _col5 (type: double), (UDFToDouble(_col1) - lag_window_0) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select s, si, d, si - lag(d, 3) over (partition by b order by si,s,d) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, si, d, si - lag(d, 3) over (partition by b order by si,s,d) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s si d _c3 +jessica ellison 262 30.41 NULL +david young 266 45.12 NULL +jessica steinbeck 274 2.15 NULL +david zipper 275 43.45 244.59 +zach nixon 283 15.95 237.88 +holly allen 285 24.37 282.85 +irene garcia 292 33.54 248.55 +ulysses xylophone 292 44.66 276.05 +irene van buren 309 35.81 284.63 +sarah miller 312 6.65 278.46 +victor garcia 312 39.14 267.34000000000003 +ethan ichabod 319 29.4 283.19 +wendy falkner 322 10.02 315.35 +oscar miller 324 25.95 284.86 +david ovid 332 28.34 302.6 +alice zipper 333 3.38 322.98 +yuri nixon 333 8.28 307.05 +ulysses nixon 335 18.48 306.66 +david ovid 336 9.36 332.62 +calvin falkner 337 17.63 328.72 +katie quirinius 349 11.3 330.52 +quinn miller 351 22.46 341.64 +victor xylophone 357 38.58 339.37 +ethan garcia 368 9.2 356.7 +nick steinbeck 395 37.54 372.54 +ulysses ichabod 415 47.61 376.42 +rachel thompson 416 37.99 406.8 +calvin young 418 47.22 380.46 +katie xylophone 425 32.59 377.39 +nick quirinius 429 19.63 391.01 +ethan ellison 453 47.92 405.78 +irene nixon 454 48.03 421.40999999999997 +bob steinbeck 462 47.04 442.37 +luke robinson 462 47.48 414.08 +gabriella steinbeck 467 9.35 418.97 +tom hernandez 467 29.36 419.96 +irene polk 485 14.26 437.52 +mike xylophone 494 36.92 484.65 +calvin allen 499 39.99 469.64 +quinn steinbeck 503 16.62 488.74 +calvin thompson 263 30.87 NULL +rachel quirinius 263 29.46 NULL +ulysses garcia 263 31.85 NULL +mike steinbeck 266 48.57 235.13 +rachel young 275 14.75 245.54 +tom king 278 31.11 246.15 +oscar robinson 283 30.35 234.43 +zach allen 284 1.88 269.25 +bob king 308 27.61 276.89 +ulysses allen 310 22.77 279.65 +fred nixon 317 0.48 315.12 +gabriella robinson 321 0.33 293.39 +bob johnson 325 9.61 302.23 +rachel davidson 335 2.34 334.52 +fred brown 337 5.8 336.67 +wendy ellison 350 20.25 340.39 +zach falkner 391 13.67 388.66 +katie xylophone 410 39.09 404.2 +holly king 413 3.56 392.75 +sarah van buren 417 7.81 403.33 +calvin van buren 430 36.01 390.90999999999997 +katie white 434 33.56 430.44 +oscar quirinius 454 7.03 446.19 +zach young 505 18.19 468.99 +gabriella robinson 506 12.8 472.44 +sarah xylophone 507 16.09 499.97 +rachel thompson 267 46.87 NULL +gabriella van buren 271 41.04 NULL +mike steinbeck 284 11.44 NULL +ethan ovid 293 2.08 246.13 +luke falkner 293 40.67 251.96 +irene nixon 321 24.35 309.56 +mike van buren 327 2.58 324.92 +ulysses robinson 329 26.64 288.33 +quinn laertes 332 10.71 307.65 +tom polk 346 34.03 343.42 +jessica johnson 352 45.71 325.36 +xavier davidson 354 33.9 343.29 +wendy nixon 364 29.42 329.97 +jessica quirinius 375 47.33 329.29 +xavier brown 376 26.17 342.1 +gabriella davidson 383 18.87 353.58 +jessica brown 388 34.09 340.67 +gabriella garcia 391 32.44 364.83 +ethan miller 396 49.07 377.13 +bob garcia 416 7.82 381.90999999999997 +priscilla hernandez 416 29.94 383.56 +holly nixon 419 17.81 369.93 +nick underhill 429 39.54 421.18 +xavier falkner 434 0.88 404.06 +luke robinson 461 44.02 443.19 +bob underhill 465 22.58 425.46 +ulysses king 483 37.98 482.12 +jessica miller 486 26.14 441.98 +bob ovid 493 9.7 470.42 +alice falkner 500 37.85 462.02 +quinn xylophone 267 49.8 NULL +gabriella thompson 268 17.15 NULL +calvin xylophone 275 49.32 NULL +gabriella zipper 279 30.41 229.2 +PREHOOK: query: explain vectorization detail +select s, lag(s, 3, 'fred') over (partition by f order by b) from over10k limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, lag(s, 3, 'fred') over (partition by f order by b) from over10k limit 100 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: f (type: float), b (type: bigint) + sort order: ++ + Map-reduce partition columns: f (type: float) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: [4, 3] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumns: [4] + valueColumns: [7] + Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE + value expressions: s (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [3, 4, 7] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: lag not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey0 (type: float), VALUE._col5 (type: string) + outputColumnNames: _col3, _col4, _col7 + Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col3: bigint, _col4: float, _col7: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col3 ASC NULLS FIRST + partition by: _col4 + raw input shape: + window functions: + window function definition + alias: lag_window_0 + arguments: _col7, 3, 'fred' + name: lag + window function: GenericUDAFLagEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), lag_window_0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select s, lag(s, 3, 'fred') over (partition by f order by b) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, lag(s, 3, 'fred') over (partition by f order by b) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s lag_window_0 +yuri thompson fred +bob ichabod fred +luke king fred +luke steinbeck fred +fred zipper fred +quinn miller fred +calvin van buren fred +holly steinbeck fred +david davidson fred +calvin thompson fred +calvin quirinius fred +david ovid fred +holly thompson fred +nick zipper fred +victor steinbeck fred +victor robinson fred +zach ovid fred +ulysses zipper fred +luke falkner fred +irene thompson fred +yuri johnson fred +ulysses falkner fred +gabriella robinson fred +alice robinson fred +priscilla xylophone fred +david laertes fred +mike underhill fred +victor van buren fred +holly falkner fred +priscilla falkner fred +ethan ovid fred +luke zipper fred +mike steinbeck fred +calvin white fred +alice quirinius fred +irene miller fred +wendy polk fred +nick young fred +yuri davidson fred +ethan ellison fred +zach hernandez fred +wendy miller fred +katie underhill fred +irene zipper fred +holly allen fred +quinn brown fred +calvin ovid fred +zach robinson fred +nick miller fred +mike allen fred +yuri van buren fred +priscilla young fred +zach miller fred +victor xylophone fred +sarah falkner fred +rachel ichabod fred +alice robinson fred +calvin ovid fred +calvin ovid fred +luke laertes fred +david hernandez fred +alice ovid fred +luke quirinius fred +oscar white fred +zach falkner fred +rachel thompson fred +priscilla king fred +xavier polk fred +wendy ichabod fred +rachel ovid fred +wendy allen fred +luke brown fred +mike brown fred +oscar ichabod fred +xavier garcia fred +yuri brown fred +bob xylophone fred +luke davidson fred +ethan quirinius fred +zach davidson fred +irene miller fred +wendy king fred +bob zipper fred +sarah thompson fred +bob carson fred +bob laertes fred +xavier allen fred +sarah robinson fred +david king fred +oscar davidson fred +victor hernandez fred +wendy polk fred +david ellison fred +ulysses johnson fred +jessica ovid fred +bob king fred +ulysses garcia fred +irene falkner fred +holly robinson fred +yuri white fred +PREHOOK: query: explain vectorization detail +select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5460 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_type (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string), p_type (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: [2, 4] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [7] + Statistics: Num rows: 26 Data size: 5460 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [2, 4, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: double + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col5 (type: double) + outputColumnNames: _col2, _col4, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 26 Data size: 12428 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: string, _col4: string, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST + partition by: _col2, _col4 + raw input shape: + window functions: + window function definition + alias: avg_window_0 + arguments: _col7 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDoubleAvg] + functionInputExpressions: [col 2] + functionNames: [avg] + keyInputColumns: [0, 1] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 0] + outputColumns: [3, 0, 1, 2] + outputTypes: [double, string, string, double] + partitionExpressions: [col 0, col 1] + streamingColumns: [] + Statistics: Num rows: 26 Data size: 12428 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), avg_window_0 (type: double) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3] + Statistics: Num rows: 26 Data size: 2756 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 26 Data size: 2756 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr avg_window_0 +Manufacturer#2 1800.7 +Manufacturer#4 1375.42 +Manufacturer#4 1620.67 +Manufacturer#4 1206.26 +Manufacturer#5 1788.73 +Manufacturer#1 1632.66 +Manufacturer#2 1690.68 +Manufacturer#2 1698.66 +Manufacturer#2 1701.6 +Manufacturer#3 1337.29 +Manufacturer#4 1844.92 +Manufacturer#4 1290.35 +Manufacturer#5 1018.1 +Manufacturer#5 1789.69 +Manufacturer#1 1753.76 +Manufacturer#1 1602.59 +Manufacturer#1 1173.15 +Manufacturer#1 1173.15 +Manufacturer#1 1414.42 +Manufacturer#2 2031.98 +Manufacturer#3 1922.98 +Manufacturer#3 1410.39 +Manufacturer#3 1190.27 +Manufacturer#5 1464.48 +Manufacturer#5 1611.66 +Manufacturer#3 1671.68 +PREHOOK: query: explain vectorization detail +select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5460 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_type (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: [2, 4] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumns: [2] + valueColumns: [7] + Statistics: Num rows: 26 Data size: 5460 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [2, 4, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: avg UNBOUNDED end frame is not supported for ROWS window type + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col5 (type: double) + outputColumnNames: _col2, _col4, _col7 + Statistics: Num rows: 26 Data size: 12428 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: string, _col4: string, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col4 ASC NULLS FIRST, _col2 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: avg_window_0 + arguments: _col7 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: ROWS PRECEDING(MAX)~CURRENT + Statistics: Num rows: 26 Data size: 12428 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), avg_window_0 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2756 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 2756 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr avg_window_0 +Manufacturer#1 1753.76 +Manufacturer#1 1693.21 +Manufacturer#1 1663.0033333333333 +Manufacturer#1 1540.54 +Manufacturer#1 1467.062 +Manufacturer#1 1458.2883333333332 +Manufacturer#2 1800.7 +Manufacturer#2 1745.69 +Manufacturer#2 1841.1200000000001 +Manufacturer#2 1805.505 +Manufacturer#2 1784.7240000000002 +Manufacturer#3 1922.98 +Manufacturer#3 1666.685 +Manufacturer#3 1668.3500000000001 +Manufacturer#3 1548.83 +Manufacturer#3 1506.522 +Manufacturer#4 1844.92 +Manufacturer#4 1610.17 +Manufacturer#4 1613.67 +Manufacturer#4 1511.8175 +Manufacturer#4 1467.5240000000001 +Manufacturer#5 1018.1 +Manufacturer#5 1241.29 +Manufacturer#5 1424.0900000000001 +Manufacturer#5 1515.25 +Manufacturer#5 1534.532 +PREHOOK: query: create table t1 (a1 int, b1 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: create table t1 (a1 int, b1 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: create table t2 (a1 int, b1 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: create table t2 (a1 int, b1 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: explain vectorization detail +from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select * +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select * +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: ts (type: timestamp), i (type: int) + sort order: ++ + Map-reduce partition columns: ts (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: [8, 2] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumns: [8] + valueColumns: [7] + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + value expressions: s (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [2, 7, 8] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:int, VALUE._col6:string + partitionColumnCount: 0 + scratchColumnTypeNames: bigint + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), VALUE._col6 (type: string), KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col2, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 0] + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: int, _col7: string, _col8: timestamp + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST + partition by: _col8 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorLongSum] + functionInputExpressions: [col 1] + functionNames: [sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1] + outputColumns: [3, 1, 2, 0] + outputTypes: [bigint, int, string, timestamp] + partitionExpressions: [col 0] + streamingColumns: [] + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: sum_window_0 (type: bigint), _col7 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 2] + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 2] + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t1 + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 2] + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select * +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +PREHOOK: Output: default@t1 +PREHOOK: Output: default@t2 +POSTHOOK: query: from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +POSTHOOK: Output: default@t1 +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: t1.a1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal(4,2), comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: t1.b1 SIMPLE [(over10k)over10k.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: t2.a1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal(4,2), comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: t2.b1 SIMPLE [(over10k)over10k.FieldSchema(name:s, type:string, comment:null), ] +_col0 _col1 +PREHOOK: query: select * from t1 limit 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select * from t1 limit 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +t1.a1 t1.b1 +65542 rachel thompson +131088 oscar brown +262258 wendy steinbeck +PREHOOK: query: select * from t2 limit 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from t2 limit 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +t2.a1 t2.b1 +65542 rachel thompson +131088 oscar brown +262258 wendy steinbeck +PREHOOK: query: explain vectorization detail +select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2) +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +limit 11 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2) +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +limit 11 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_retailprice (type: double) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: [2, 7] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumns: [2] + valueColumns: [5] + Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: lead and lag function not supported in argument expression of aggregation function sum + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 9828 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col7 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: sum_window_1 + arguments: lag(...) + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: last_value_window_2 + arguments: _col7 + name: last_value + window function: GenericUDAFLastValueEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + Lead/Lag information: lag(...) (type: double) + Statistics: Num rows: 26 Data size: 9828 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col7 (type: double), _col5 (type: int), ((round(sum_window_0, 2) + 50.0) = round((sum_window_1 + last_value_window_2), 2)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 2964 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 11 + Statistics: Num rows: 11 Data size: 1254 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 1254 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 11 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2) +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +limit 11 +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2) +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +limit 11 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_retailprice p_size _c3 +Manufacturer#1 1173.15 2 true +Manufacturer#1 1173.15 2 true +Manufacturer#1 1414.42 28 true +Manufacturer#1 1602.59 6 true +Manufacturer#1 1632.66 42 true +Manufacturer#1 1753.76 34 true +Manufacturer#2 1690.68 14 true +Manufacturer#2 1698.66 25 true +Manufacturer#2 1701.6 18 true +Manufacturer#2 1800.7 40 true +Manufacturer#2 2031.98 2 true +PREHOOK: query: select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), +max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1 +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), +max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1 +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_retailprice p_size _c3 _c4 +Manufacturer#1 1173.15 2 true true +Manufacturer#1 1173.15 2 true true +Manufacturer#1 1414.42 28 true true +Manufacturer#1 1602.59 6 true true +Manufacturer#1 1632.66 42 true true +Manufacturer#1 1753.76 34 true true +Manufacturer#2 1690.68 14 true true +Manufacturer#2 1698.66 25 true true +Manufacturer#2 1701.6 18 true true +Manufacturer#2 1800.7 40 true true +Manufacturer#2 2031.98 2 true true +Manufacturer#3 1190.27 14 true true +Manufacturer#3 1337.29 45 true true +Manufacturer#3 1410.39 19 true true +Manufacturer#3 1671.68 17 true true +Manufacturer#3 1922.98 1 true true +Manufacturer#4 1206.26 27 true true +Manufacturer#4 1290.35 12 true true +Manufacturer#4 1375.42 39 true true +Manufacturer#4 1620.67 10 true true +Manufacturer#4 1844.92 7 true true +Manufacturer#5 1018.1 46 true true +Manufacturer#5 1464.48 23 true true +Manufacturer#5 1611.66 6 true true +Manufacturer#5 1788.73 2 true true +Manufacturer#5 1789.69 31 true true +PREHOOK: query: select p_mfgr, p_retailprice, p_size, +rank() over (distribute by p_mfgr sort by p_retailprice) as r, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1 +from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_retailprice, p_size, +rank() over (distribute by p_mfgr sort by p_retailprice) as r, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1 +from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_retailprice p_size r s2 s1 +Manufacturer#1 1173.15 2 1 1173.15 1168.15 +Manufacturer#1 1173.15 2 1 2346.3 2341.3 +Manufacturer#1 1414.42 28 3 3760.7200000000003 3755.7200000000003 +Manufacturer#1 1602.59 6 4 5363.31 5358.31 +Manufacturer#1 1632.66 42 5 6995.97 6990.97 +Manufacturer#1 1753.76 34 6 8749.73 8744.73 +Manufacturer#2 1690.68 14 1 1690.68 1685.68 +Manufacturer#2 1698.66 25 2 3389.34 3384.34 +Manufacturer#2 1701.6 18 3 5090.9400000000005 5085.9400000000005 +Manufacturer#2 1800.7 40 4 6891.64 6886.64 +Manufacturer#2 2031.98 2 5 8923.62 8918.62 +Manufacturer#3 1190.27 14 1 1190.27 1185.27 +Manufacturer#3 1337.29 45 2 2527.56 2522.56 +Manufacturer#3 1410.39 19 3 3937.95 3932.95 +Manufacturer#3 1671.68 17 4 5609.63 5604.63 +Manufacturer#3 1922.98 1 5 7532.610000000001 7527.610000000001 +Manufacturer#4 1206.26 27 1 1206.26 1201.26 +Manufacturer#4 1290.35 12 2 2496.6099999999997 2491.6099999999997 +Manufacturer#4 1375.42 39 3 3872.0299999999997 3867.0299999999997 +Manufacturer#4 1620.67 10 4 5492.7 5487.7 +Manufacturer#4 1844.92 7 5 7337.62 7332.62 +Manufacturer#5 1018.1 46 1 1018.1 1013.1 +Manufacturer#5 1464.48 23 2 2482.58 2477.58 +Manufacturer#5 1611.66 6 3 4094.24 4089.24 +Manufacturer#5 1788.73 2 4 5882.969999999999 5877.969999999999 +Manufacturer#5 1789.69 31 5 7672.66 7667.66 +PREHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr avg_window_0 +Manufacturer#2 1800.7 +Manufacturer#4 1375.42 +Manufacturer#4 1620.67 +Manufacturer#4 1206.26 +Manufacturer#5 1788.73 +Manufacturer#1 1632.66 +Manufacturer#2 1690.68 +Manufacturer#2 1698.66 +Manufacturer#2 1701.6 +Manufacturer#3 1337.29 +Manufacturer#4 1844.92 +Manufacturer#4 1290.35 +Manufacturer#5 1018.1 +Manufacturer#5 1789.69 +Manufacturer#1 1753.76 +Manufacturer#1 1602.59 +Manufacturer#1 1173.15 +Manufacturer#1 1173.15 +Manufacturer#1 1414.42 +Manufacturer#2 2031.98 +Manufacturer#3 1922.98 +Manufacturer#3 1410.39 +Manufacturer#3 1190.27 +Manufacturer#5 1464.48 +Manufacturer#5 1611.66 +Manufacturer#3 1671.68 +PREHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr avg_window_0 +Manufacturer#1 1753.76 +Manufacturer#1 1693.21 +Manufacturer#1 1663.0033333333333 +Manufacturer#1 1540.54 +Manufacturer#1 1467.062 +Manufacturer#1 1458.2883333333332 +Manufacturer#2 1800.7 +Manufacturer#2 1745.69 +Manufacturer#2 1841.1200000000001 +Manufacturer#2 1805.505 +Manufacturer#2 1784.7240000000002 +Manufacturer#3 1922.98 +Manufacturer#3 1666.685 +Manufacturer#3 1668.3500000000001 +Manufacturer#3 1548.83 +Manufacturer#3 1506.522 +Manufacturer#4 1844.92 +Manufacturer#4 1610.17 +Manufacturer#4 1613.67 +Manufacturer#4 1511.8175 +Manufacturer#4 1467.5240000000001 +Manufacturer#5 1018.1 +Manufacturer#5 1241.29 +Manufacturer#5 1424.0900000000001 +Manufacturer#5 1515.25 +Manufacturer#5 1534.532 +PREHOOK: query: from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select * +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +PREHOOK: Output: default@t1 +PREHOOK: Output: default@t2 +POSTHOOK: query: from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +POSTHOOK: Output: default@t1 +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: t1.a1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal(4,2), comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: t1.b1 SIMPLE [(over10k)over10k.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: t2.a1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal(4,2), comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: t2.b1 SIMPLE [(over10k)over10k.FieldSchema(name:s, type:string, comment:null), ] +_col0 _col1 +PREHOOK: query: select * from t1 limit 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select * from t1 limit 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +t1.a1 t1.b1 +65542 rachel thompson +131088 oscar brown +262258 wendy steinbeck +PREHOOK: query: select * from t2 limit 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from t2 limit 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +t2.a1 t2.b1 +65542 rachel thompson +131088 oscar brown +262258 wendy steinbeck +PREHOOK: query: select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2) +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +limit 11 +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2) +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +limit 11 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_retailprice p_size _c3 +Manufacturer#1 1173.15 2 true +Manufacturer#1 1173.15 2 true +Manufacturer#1 1414.42 28 true +Manufacturer#1 1602.59 6 true +Manufacturer#1 1632.66 42 true +Manufacturer#1 1753.76 34 true +Manufacturer#2 1690.68 14 true +Manufacturer#2 1698.66 25 true +Manufacturer#2 1701.6 18 true +Manufacturer#2 1800.7 40 true +Manufacturer#2 2031.98 2 true diff --git ql/src/test/results/clientpositive/vector_windowing_expressions.q.out ql/src/test/results/clientpositive/vector_windowing_expressions.q.out index beb01b4..26e2f9b 100644 --- ql/src/test/results/clientpositive/vector_windowing_expressions.q.out +++ ql/src/test/results/clientpositive/vector_windowing_expressions.q.out @@ -1820,3 +1820,226 @@ Manufacturer#2 1698.66 25 true Manufacturer#2 1701.6 18 true Manufacturer#2 1800.7 40 true Manufacturer#2 2031.98 2 true +PREHOOK: query: select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), +max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1 +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), +max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1 +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_retailprice p_size _c3 _c4 +Manufacturer#1 1173.15 2 true true +Manufacturer#1 1173.15 2 true true +Manufacturer#1 1414.42 28 true true +Manufacturer#1 1602.59 6 true true +Manufacturer#1 1632.66 42 true true +Manufacturer#1 1753.76 34 true true +Manufacturer#2 1690.68 14 true true +Manufacturer#2 1698.66 25 true true +Manufacturer#2 1701.6 18 true true +Manufacturer#2 1800.7 40 true true +Manufacturer#2 2031.98 2 true true +Manufacturer#3 1190.27 14 true true +Manufacturer#3 1337.29 45 true true +Manufacturer#3 1410.39 19 true true +Manufacturer#3 1671.68 17 true true +Manufacturer#3 1922.98 1 true true +Manufacturer#4 1206.26 27 true true +Manufacturer#4 1290.35 12 true true +Manufacturer#4 1375.42 39 true true +Manufacturer#4 1620.67 10 true true +Manufacturer#4 1844.92 7 true true +Manufacturer#5 1018.1 46 true true +Manufacturer#5 1464.48 23 true true +Manufacturer#5 1611.66 6 true true +Manufacturer#5 1788.73 2 true true +Manufacturer#5 1789.69 31 true true +PREHOOK: query: select p_mfgr, p_retailprice, p_size, +rank() over (distribute by p_mfgr sort by p_retailprice) as r, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1 +from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_retailprice, p_size, +rank() over (distribute by p_mfgr sort by p_retailprice) as r, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1 +from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_retailprice p_size r s2 s1 +Manufacturer#1 1173.15 2 1 1173.15 1168.15 +Manufacturer#1 1173.15 2 1 2346.3 2341.3 +Manufacturer#1 1414.42 28 3 3760.7200000000003 3755.7200000000003 +Manufacturer#1 1602.59 6 4 5363.31 5358.31 +Manufacturer#1 1632.66 42 5 6995.97 6990.97 +Manufacturer#1 1753.76 34 6 8749.73 8744.73 +Manufacturer#2 1690.68 14 1 1690.68 1685.68 +Manufacturer#2 1698.66 25 2 3389.34 3384.34 +Manufacturer#2 1701.6 18 3 5090.9400000000005 5085.9400000000005 +Manufacturer#2 1800.7 40 4 6891.64 6886.64 +Manufacturer#2 2031.98 2 5 8923.62 8918.62 +Manufacturer#3 1190.27 14 1 1190.27 1185.27 +Manufacturer#3 1337.29 45 2 2527.56 2522.56 +Manufacturer#3 1410.39 19 3 3937.95 3932.95 +Manufacturer#3 1671.68 17 4 5609.63 5604.63 +Manufacturer#3 1922.98 1 5 7532.610000000001 7527.610000000001 +Manufacturer#4 1206.26 27 1 1206.26 1201.26 +Manufacturer#4 1290.35 12 2 2496.6099999999997 2491.6099999999997 +Manufacturer#4 1375.42 39 3 3872.0299999999997 3867.0299999999997 +Manufacturer#4 1620.67 10 4 5492.7 5487.7 +Manufacturer#4 1844.92 7 5 7337.62 7332.62 +Manufacturer#5 1018.1 46 1 1018.1 1013.1 +Manufacturer#5 1464.48 23 2 2482.58 2477.58 +Manufacturer#5 1611.66 6 3 4094.24 4089.24 +Manufacturer#5 1788.73 2 4 5882.969999999999 5877.969999999999 +Manufacturer#5 1789.69 31 5 7672.66 7667.66 +PREHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr avg_window_0 +Manufacturer#1 1753.76 +Manufacturer#1 1632.66 +Manufacturer#1 1602.59 +Manufacturer#1 1173.15 +Manufacturer#1 1173.15 +Manufacturer#1 1414.42 +Manufacturer#2 1800.7 +Manufacturer#2 1690.68 +Manufacturer#2 2031.98 +Manufacturer#2 1698.66 +Manufacturer#2 1701.6 +Manufacturer#3 1922.98 +Manufacturer#3 1410.39 +Manufacturer#3 1671.68 +Manufacturer#3 1190.27 +Manufacturer#3 1337.29 +Manufacturer#4 1844.92 +Manufacturer#4 1375.42 +Manufacturer#4 1620.67 +Manufacturer#4 1206.26 +Manufacturer#4 1290.35 +Manufacturer#5 1018.1 +Manufacturer#5 1464.48 +Manufacturer#5 1789.69 +Manufacturer#5 1788.73 +Manufacturer#5 1611.66 +PREHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr avg_window_0 +Manufacturer#1 1753.76 +Manufacturer#1 1693.21 +Manufacturer#1 1663.0033333333333 +Manufacturer#1 1540.54 +Manufacturer#1 1467.062 +Manufacturer#1 1458.2883333333332 +Manufacturer#2 1800.7 +Manufacturer#2 1745.69 +Manufacturer#2 1841.1200000000001 +Manufacturer#2 1805.505 +Manufacturer#2 1784.7240000000002 +Manufacturer#3 1922.98 +Manufacturer#3 1666.685 +Manufacturer#3 1668.3500000000001 +Manufacturer#3 1548.83 +Manufacturer#3 1506.522 +Manufacturer#4 1844.92 +Manufacturer#4 1610.17 +Manufacturer#4 1613.67 +Manufacturer#4 1511.8175 +Manufacturer#4 1467.5240000000001 +Manufacturer#5 1018.1 +Manufacturer#5 1241.29 +Manufacturer#5 1424.0900000000001 +Manufacturer#5 1515.25 +Manufacturer#5 1534.532 +PREHOOK: query: from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select * +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +PREHOOK: Output: default@t1 +PREHOOK: Output: default@t2 +POSTHOOK: query: from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +POSTHOOK: Output: default@t1 +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: t1.a1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal(4,2), comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: t1.b1 SIMPLE [(over10k)over10k.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: t2.a1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal(4,2), comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: t2.b1 SIMPLE [(over10k)over10k.FieldSchema(name:s, type:string, comment:null), ] +_col0 _col1 +PREHOOK: query: select * from t1 limit 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select * from t1 limit 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +t1.a1 t1.b1 +65542 rachel thompson +131088 oscar brown +262258 wendy steinbeck +PREHOOK: query: select * from t2 limit 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from t2 limit 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +t2.a1 t2.b1 +65542 rachel thompson +131088 oscar brown +262258 wendy steinbeck +PREHOOK: query: select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2) +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +limit 11 +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2) +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +limit 11 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_retailprice p_size _c3 +Manufacturer#1 1173.15 2 true +Manufacturer#1 1173.15 2 true +Manufacturer#1 1414.42 28 true +Manufacturer#1 1602.59 6 true +Manufacturer#1 1632.66 42 true +Manufacturer#1 1753.76 34 true +Manufacturer#2 1690.68 14 true +Manufacturer#2 1698.66 25 true +Manufacturer#2 1701.6 18 true +Manufacturer#2 1800.7 40 true +Manufacturer#2 2031.98 2 true diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java index f914a22..c4f19cf 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java @@ -78,7 +78,7 @@ public BytesColumnVector() { * @param size number of elements in the column vector */ public BytesColumnVector(int size) { - super(size); + super(Type.BYTES, size); vector = new byte[size][]; start = new int[size]; length = new int[size]; diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java index 065c1fa..0e76286 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java @@ -49,6 +49,8 @@ UNION } + public final Type type; + /* * If hasNulls is true, then this array contains true if the value * is null, otherwise false. The array is always allocated, so a batch can be re-used @@ -75,7 +77,8 @@ * * @param len Vector length */ - public ColumnVector(int len) { + public ColumnVector(Type type, int len) { + this.type = type; isNull = new boolean[len]; noNulls = true; isRepeating = false; diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java index 67076eb..cfe40ac 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java @@ -40,7 +40,7 @@ public DecimalColumnVector(int precision, int scale) { } public DecimalColumnVector(int size, int precision, int scale) { - super(size); + super(Type.DECIMAL, size); this.precision = (short) precision; this.scale = (short) scale; vector = new HiveDecimalWritable[size]; diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java index 11409bd..1395144 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java @@ -48,7 +48,7 @@ public DoubleColumnVector() { * @param len */ public DoubleColumnVector(int len) { - super(len); + super(Type.DOUBLE, len); vector = new double[len]; } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java index e876c05..587e2b9 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java @@ -69,7 +69,7 @@ public IntervalDayTimeColumnVector() { * @param len the number of rows */ public IntervalDayTimeColumnVector(int len) { - super(len); + super(Type.INTERVAL_DAY_TIME, len); totalSeconds = new long[len]; nanos = new int[len]; diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java index 66240dd..02a8b3c 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java @@ -40,7 +40,7 @@ public ListColumnVector() { * @param child The child vector */ public ListColumnVector(int len, ColumnVector child) { - super(len); + super(Type.LIST, len); this.child = child; } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java index 3ae6a33..a407bcd 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java @@ -48,7 +48,7 @@ public LongColumnVector() { * @param len the number of rows */ public LongColumnVector(int len) { - super(len); + super(Type.LONG, len); vector = new long[len]; } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java index e8421e3..64badb9 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java @@ -42,7 +42,7 @@ public MapColumnVector() { * @param values The values column vector */ public MapColumnVector(int len, ColumnVector keys, ColumnVector values) { - super(len); + super(Type.MAP, len); this.keys = keys; this.values = values; } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MultiValuedColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MultiValuedColumnVector.java index 892e8d8..6613635 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MultiValuedColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MultiValuedColumnVector.java @@ -40,8 +40,8 @@ * * @param len Vector length */ - public MultiValuedColumnVector(int len) { - super(len); + public MultiValuedColumnVector(Type type, int len) { + super(type, len); childCount = 0; offsets = new long[len]; lengths = new long[len]; diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java index a361899..45f3ac6 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java @@ -40,7 +40,7 @@ public StructColumnVector() { * @param fields the field column vectors */ public StructColumnVector(int len, ColumnVector... fields) { - super(len); + super(Type.STRUCT, len); this.fields = fields; } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java index 9d579ce..ef1c817 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java @@ -67,7 +67,7 @@ public TimestampColumnVector() { * @param len the number of rows */ public TimestampColumnVector(int len) { - super(len); + super(Type.TIMESTAMP, len); time = new long[len]; nanos = new int[len]; diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java index 151f791..e844b11 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java @@ -41,7 +41,7 @@ public UnionColumnVector() { * @param fields the field column vectors */ public UnionColumnVector(int len, ColumnVector... fields) { - super(len); + super(Type.UNION, len); tags = new int[len]; this.fields = fields; }