diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 0c590c8..dca8567 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -610,6 +610,20 @@ minillaplocal.query.files=acid_globallimit.q,\ vector_auto_smb_mapjoin_14.q,\ vector_decimal_2.q,\ vector_decimal_udf.q,\ + vector_groupby_cube1.q,\ + vector_groupby_grouping_id1.q,\ + vector_groupby_grouping_id2.q,\ + vector_groupby_grouping_id3.q,\ + vector_groupby_grouping_sets1.q,\ + vector_groupby_grouping_sets2.q,\ + vector_groupby_grouping_sets3.q,\ + vector_groupby_grouping_sets4.q,\ + vector_groupby_grouping_sets5.q,\ + vector_groupby_grouping_sets6.q,\ + vector_groupby_grouping_sets_grouping.q,\ + vector_groupby_grouping_sets_limit.q,\ + vector_groupby_grouping_window.q,\ + vector_groupby_rollup1.q,\ vector_join30.q,\ vector_join_filters.q,\ vector_leftsemi_mapjoin.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java index 935b47b..7ac4f07 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java @@ -20,10 +20,8 @@ import java.util.Arrays; -import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; /** * Class to keep information on a set of typed vector columns. Used by @@ -64,147 +62,87 @@ */ protected int[] intervalDayTimeIndices; - /** - * Helper class for looking up a key value based on key index. - */ - public class KeyLookupHelper { - public int longIndex; - public int doubleIndex; - public int stringIndex; - public int decimalIndex; - public int timestampIndex; - public int intervalDayTimeIndex; - - private static final int INDEX_UNUSED = -1; - - private void resetIndices() { - this.longIndex = this.doubleIndex = this.stringIndex = this.decimalIndex = - timestampIndex = intervalDayTimeIndex = INDEX_UNUSED; - } - public void setLong(int index) { - resetIndices(); - this.longIndex= index; - } - - public void setDouble(int index) { - resetIndices(); - this.doubleIndex = index; - } - - public void setString(int index) { - resetIndices(); - this.stringIndex = index; - } - - public void setDecimal(int index) { - resetIndices(); - this.decimalIndex = index; - } - - public void setTimestamp(int index) { - resetIndices(); - this.timestampIndex= index; - } - - public void setIntervalDayTime(int index) { - resetIndices(); - this.intervalDayTimeIndex= index; - } - } - - /** - * Lookup vector to map from key index to primitive type index. - */ - protected KeyLookupHelper[] indexLookup; + final protected int keyCount; + private int addKeyIndex; - private int keyCount; - private int addIndex; + private int addLongIndex; + private int addDoubleIndex; + private int addStringIndex; + private int addDecimalIndex; + private int addTimestampIndex; + private int addIntervalDayTimeIndex; - protected int longIndicesIndex; - protected int doubleIndicesIndex; - protected int stringIndicesIndex; - protected int decimalIndicesIndex; - protected int timestampIndicesIndex; - protected int intervalDayTimeIndicesIndex; + // Given the keyIndex these arrays return: + // The ColumnVector.Type, + // The type specific index into longIndices, doubleIndices, etc... + protected ColumnVector.Type[] columnVectorTypes; + protected int[] columnTypeSpecificIndices; protected VectorColumnSetInfo(int keyCount) { this.keyCount = keyCount; - this.addIndex = 0; + this.addKeyIndex = 0; // We'll over allocate and then shrink the array for each type longIndices = new int[this.keyCount]; - longIndicesIndex = 0; + addLongIndex = 0; doubleIndices = new int[this.keyCount]; - doubleIndicesIndex = 0; + addDoubleIndex = 0; stringIndices = new int[this.keyCount]; - stringIndicesIndex = 0; + addStringIndex = 0; decimalIndices = new int[this.keyCount]; - decimalIndicesIndex = 0; + addDecimalIndex = 0; timestampIndices = new int[this.keyCount]; - timestampIndicesIndex = 0; + addTimestampIndex = 0; intervalDayTimeIndices = new int[this.keyCount]; - intervalDayTimeIndicesIndex = 0; - indexLookup = new KeyLookupHelper[this.keyCount]; - } + addIntervalDayTimeIndex = 0; - protected void addKey(String outputType) throws HiveException { - indexLookup[addIndex] = new KeyLookupHelper(); + columnVectorTypes = new ColumnVector.Type[this.keyCount]; + columnTypeSpecificIndices = new int[this.keyCount]; + } - String typeName = VectorizationContext.mapTypeNameSynonyms(outputType); - TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); - Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); + protected void addKey(ColumnVector.Type columnVectorType) throws HiveException { switch (columnVectorType) { case LONG: - longIndices[longIndicesIndex] = addIndex; - indexLookup[addIndex].setLong(longIndicesIndex); - ++longIndicesIndex; + longIndices[addLongIndex] = addKeyIndex; + columnTypeSpecificIndices[addKeyIndex] = addLongIndex++; break; - case DOUBLE: - doubleIndices[doubleIndicesIndex] = addIndex; - indexLookup[addIndex].setDouble(doubleIndicesIndex); - ++doubleIndicesIndex; + doubleIndices[addDoubleIndex] = addKeyIndex; + columnTypeSpecificIndices[addKeyIndex] = addDoubleIndex++; break; - case BYTES: - stringIndices[stringIndicesIndex]= addIndex; - indexLookup[addIndex].setString(stringIndicesIndex); - ++stringIndicesIndex; + stringIndices[addStringIndex]= addKeyIndex; + columnTypeSpecificIndices[addKeyIndex] = addStringIndex++; break; - case DECIMAL: - decimalIndices[decimalIndicesIndex]= addIndex; - indexLookup[addIndex].setDecimal(decimalIndicesIndex); - ++decimalIndicesIndex; - break; - + decimalIndices[addDecimalIndex]= addKeyIndex; + columnTypeSpecificIndices[addKeyIndex] = addDecimalIndex++; + break; case TIMESTAMP: - timestampIndices[timestampIndicesIndex] = addIndex; - indexLookup[addIndex].setTimestamp(timestampIndicesIndex); - ++timestampIndicesIndex; + timestampIndices[addTimestampIndex] = addKeyIndex; + columnTypeSpecificIndices[addKeyIndex] = addTimestampIndex++; break; - case INTERVAL_DAY_TIME: - intervalDayTimeIndices[intervalDayTimeIndicesIndex] = addIndex; - indexLookup[addIndex].setIntervalDayTime(intervalDayTimeIndicesIndex); - ++intervalDayTimeIndicesIndex; + intervalDayTimeIndices[addIntervalDayTimeIndex] = addKeyIndex; + columnTypeSpecificIndices[addKeyIndex] = addIntervalDayTimeIndex++; break; - default: throw new HiveException("Unexpected column vector type " + columnVectorType); } - addIndex++; + columnVectorTypes[addKeyIndex] = columnVectorType; + addKeyIndex++; } - protected void finishAdding() { - longIndices = Arrays.copyOf(longIndices, longIndicesIndex); - doubleIndices = Arrays.copyOf(doubleIndices, doubleIndicesIndex); - stringIndices = Arrays.copyOf(stringIndices, stringIndicesIndex); - decimalIndices = Arrays.copyOf(decimalIndices, decimalIndicesIndex); - timestampIndices = Arrays.copyOf(timestampIndices, timestampIndicesIndex); - intervalDayTimeIndices = Arrays.copyOf(intervalDayTimeIndices, intervalDayTimeIndicesIndex); + + protected void finishAdding() throws HiveException { + longIndices = Arrays.copyOf(longIndices, addLongIndex); + doubleIndices = Arrays.copyOf(doubleIndices, addDoubleIndex); + stringIndices = Arrays.copyOf(stringIndices, addStringIndex); + decimalIndices = Arrays.copyOf(decimalIndices, addDecimalIndex); + timestampIndices = Arrays.copyOf(timestampIndices, addTimestampIndex); + intervalDayTimeIndices = Arrays.copyOf(intervalDayTimeIndices, addIntervalDayTimeIndex); } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index fef7c2a..90748e9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -22,16 +22,21 @@ import java.lang.management.MemoryMXBean; import java.lang.ref.SoftReference; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; +import org.apache.commons.lang.ArrayUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.KeyWrapper; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory; @@ -52,6 +57,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import javolution.util.FastBitSet; + import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; @@ -110,6 +117,24 @@ private transient VectorAssignRow vectorAssignRow; + /* + * Grouping sets members. + */ + private transient boolean groupingSetsPresent; + + // The field bits (i.e. which fields to include) or "id" for each grouping set. + private transient int[] groupingSets; + + // The position in the column keys of the dummy grouping set id column. + private transient int groupingSetsPosition; + + // The planner puts a constant field in for the dummy grouping set id. We will overwrite it + // as we process the grouping sets. + private transient ConstantVectorExpression groupingSetsDummyVectorExpression; + + // We translate the grouping set bit field into a boolean arrays. + private transient boolean[][] allGroupingSetsOverrideIsNulls; + private transient int numEntriesHashTable; private transient long maxHashTblMemory; @@ -144,6 +169,32 @@ public void endGroup() throws HiveException { // Do nothing. } + protected abstract void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, + boolean[] currentGroupingSetsOverrideIsNulls) throws HiveException; + + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { + + if (!groupingSetsPresent) { + doProcessBatch(batch, false, null); + return; + } + + // We drive the doProcessBatch logic with the same batch but different + // grouping set id and null variation. + // PERFORMANCE NOTE: We do not try to reuse columns and generate the KeyWrappers anew... + + final int size = groupingSets.length; + for (int i = 0; i < size; i++) { + + // NOTE: We are overwriting the constant vector value... + groupingSetsDummyVectorExpression.setLongValue(groupingSets[i]); + groupingSetsDummyVectorExpression.evaluate(batch); + + doProcessBatch(batch, (i == 0), allGroupingSetsOverrideIsNulls[i]); + } + } + /** * Evaluates the aggregators on the current batch. * The aggregationBatchInfo must have been prepared @@ -207,7 +258,8 @@ public void initialize(Configuration hconf) throws HiveException { } @Override - public void processBatch(VectorizedRowBatch batch) throws HiveException { + public void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, + boolean[] currentGroupingSetsOverrideIsNulls) throws HiveException { for (int i = 0; i < aggregators.length; ++i) { aggregators[i].aggregateInput(aggregationBuffers.getAggregationBuffer(i), batch); } @@ -328,11 +380,24 @@ public void initialize(Configuration hconf) throws HiveException { } @Override - public void processBatch(VectorizedRowBatch batch) throws HiveException { + public void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, + boolean[] currentGroupingSetsOverrideIsNulls) throws HiveException { + + if (!groupingSetsPresent || isFirstGroupingSet) { + + // Evaluate the key expressions once. + for(int i = 0; i < keyExpressions.length; ++i) { + keyExpressions[i].evaluate(batch); + } + } // First we traverse the batch to evaluate and prepare the KeyWrappers // After this the KeyWrappers are properly set and hash code is computed - keyWrappersBatch.evaluateBatch(batch); + if (!groupingSetsPresent) { + keyWrappersBatch.evaluateBatch(batch); + } else { + keyWrappersBatch.evaluateBatchGroupingSets(batch, currentGroupingSetsOverrideIsNulls); + } // Next we locate the aggregation buffer set for each key prepareBatchAggregationBufferSets(batch); @@ -607,10 +672,24 @@ public void free(VectorAggregationBufferRow t) { } @Override - public void processBatch(VectorizedRowBatch batch) throws HiveException { + public void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, + boolean[] currentGroupingSetsOverrideIsNulls) throws HiveException { + + if (!groupingSetsPresent || isFirstGroupingSet) { + + // Evaluate the key expressions once. + for(int i = 0; i < keyExpressions.length; ++i) { + keyExpressions[i].evaluate(batch); + } + } + // First we traverse the batch to evaluate and prepare the KeyWrappers // After this the KeyWrappers are properly set and hash code is computed - keyWrappersBatch.evaluateBatch(batch); + if (!groupingSetsPresent) { + keyWrappersBatch.evaluateBatch(batch); + } else { + keyWrappersBatch.evaluateBatchGroupingSets(batch, currentGroupingSetsOverrideIsNulls); + } VectorHashKeyWrapper[] batchKeys = keyWrappersBatch.getVectorHashKeyWrappers(); @@ -702,7 +781,10 @@ public void close(boolean aborted) throws HiveException { @Override public void initialize(Configuration hconf) throws HiveException { inGroup = false; - groupKeyHelper = new VectorGroupKeyHelper(keyExpressions.length); + + // We do not include the dummy grouping set column in the output. So we pass outputKeyLength + // instead of keyExpressions.length + groupKeyHelper = new VectorGroupKeyHelper(outputKeyLength); groupKeyHelper.init(keyExpressions); groupAggregators = allocateAggregationBuffer(); buffer = new DataOutputBuffer(); @@ -725,7 +807,8 @@ public void endGroup() throws HiveException { } @Override - public void processBatch(VectorizedRowBatch batch) throws HiveException { + public void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, + boolean[] currentGroupingSetsOverrideIsNulls) throws HiveException { assert(inGroup); if (first) { // Copy the group key to output batch now. We'll copy in the aggregates at the end of the group. @@ -778,6 +861,49 @@ public VectorGroupByOperator(CompilationOpContext ctx) { super(ctx); } + private void setupGroupingSets() { + + groupingSetsPresent = conf.isGroupingSetsPresent(); + if (!groupingSetsPresent) { + groupingSets = null; + groupingSetsPosition = -1; + groupingSetsDummyVectorExpression = null; + allGroupingSetsOverrideIsNulls = null; + return; + } + + groupingSets = ArrayUtils.toPrimitive(conf.getListGroupingSets().toArray(new Integer[0])); + groupingSetsPosition = conf.getGroupingSetPosition(); + + allGroupingSetsOverrideIsNulls = new boolean[groupingSets.length][]; + + int pos = 0; + for (int groupingSet: groupingSets) { + + // Create the mapping corresponding to the grouping set + + // Assume all columns are null, except the dummy column is always non-null. + boolean[] groupingSetsOverrideIsNull = new boolean[keyExpressions.length]; + Arrays.fill(groupingSetsOverrideIsNull, true); + groupingSetsOverrideIsNull[groupingSetsPosition] = false; + + // Add keys of this grouping set. + FastBitSet bitset = GroupByOperator.groupingSet2BitSet(groupingSet, groupingSetsPosition); + for (int keyPos = bitset.nextClearBit(0); keyPos < groupingSetsPosition; + keyPos = bitset.nextClearBit(keyPos+1)) { + groupingSetsOverrideIsNull[keyPos] = false; + } + + allGroupingSetsOverrideIsNulls[pos] = groupingSetsOverrideIsNull; + pos++; + } + + // The last key column is the dummy grouping set id. + // + // Figure out which (scratch) column was used so we can overwrite the dummy id. + + groupingSetsDummyVectorExpression = (ConstantVectorExpression) keyExpressions[groupingSetsPosition]; + } @Override protected void initializeOp(Configuration hconf) throws HiveException { @@ -831,15 +957,19 @@ protected void initializeOp(Configuration hconf) throws HiveException { forwardCache = new Object[outputKeyLength + aggregators.length]; + setupGroupingSets(); + switch (vectorDesc.getProcessingMode()) { case GLOBAL: Preconditions.checkState(outputKeyLength == 0); + Preconditions.checkState(!groupingSetsPresent); processingMode = this.new ProcessingModeGlobalAggregate(); break; case HASH: processingMode = this.new ProcessingModeHashAggregate(); break; case MERGE_PARTIAL: + Preconditions.checkState(!groupingSetsPresent); processingMode = this.new ProcessingModeReduceMergePartial(); break; case STREAMING: diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java index 50d0452..0ff389e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java @@ -19,8 +19,12 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.io.IOException; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.DataOutputBuffer; /** @@ -33,9 +37,16 @@ public VectorGroupKeyHelper(int keyCount) { } void init(VectorExpression[] keyExpressions) throws HiveException { + + // NOTE: To support pruning the grouping set id dummy key by VectorGroupbyOpeator MERGE_PARTIAL + // case, we use the keyCount passed to the constructor and not keyExpressions.length. + // Inspect the output type of each key expression. - for(int i=0; i < keyExpressions.length; ++i) { - addKey(keyExpressions[i].getOutputType()); + for(int i=0; i < keyCount; ++i) { + String typeName = VectorizationContext.mapTypeNameSynonyms(keyExpressions[i].getOutputType()); + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); + addKey(columnVectorType); } finishAdding(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java index 5de59b1..2c51882 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java @@ -30,6 +30,8 @@ import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import com.google.common.base.Preconditions; + /** * A hash map key wrapper for vectorized processing. * It stores the key values as primitives in arrays for each supported primitive type. @@ -59,15 +61,22 @@ private HiveDecimalWritable[] decimalValues; private Timestamp[] timestampValues; + private static Timestamp ZERO_TIMESTAMP = new Timestamp(0); private HiveIntervalDayTime[] intervalDayTimeValues; + private static HiveIntervalDayTime ZERO_INTERVALDAYTIME= new HiveIntervalDayTime(0, 0); + // NOTE: The null array is indexed by keyIndex, which is not available internally. The mapping + // from a long, double, etc index to key index is kept once in the separate + // VectorColumnSetInfo object. private boolean[] isNull; + private int hashcode; private VectorHashKeyWrapper(int longValuesCount, int doubleValuesCount, int byteValuesCount, int decimalValuesCount, int timestampValuesCount, - int intervalDayTimeValuesCount) { + int intervalDayTimeValuesCount, + int keyCount) { longValues = longValuesCount > 0 ? new long[longValuesCount] : EMPTY_LONG_ARRAY; doubleValues = doubleValuesCount > 0 ? new double[doubleValuesCount] : EMPTY_DOUBLE_ARRAY; decimalValues = decimalValuesCount > 0 ? new HiveDecimalWritable[decimalValuesCount] : EMPTY_DECIMAL_ARRAY; @@ -91,8 +100,7 @@ private VectorHashKeyWrapper(int longValuesCount, int doubleValuesCount, for(int i = 0; i < intervalDayTimeValuesCount; ++i) { intervalDayTimeValues[i] = new HiveIntervalDayTime(); } - isNull = new boolean[longValuesCount + doubleValuesCount + byteValuesCount + - decimalValuesCount + timestampValuesCount + intervalDayTimeValuesCount]; + isNull = new boolean[keyCount]; hashcode = 0; } @@ -101,13 +109,14 @@ private VectorHashKeyWrapper() { public static VectorHashKeyWrapper allocate(int longValuesCount, int doubleValuesCount, int byteValuesCount, int decimalValuesCount, int timestampValuesCount, - int intervalDayTimeValuesCount) { + int intervalDayTimeValuesCount, int keyCount) { if ((longValuesCount + doubleValuesCount + byteValuesCount + decimalValuesCount + timestampValuesCount + intervalDayTimeValuesCount) == 0) { return EMPTY_KEY_WRAPPER; } return new VectorHashKeyWrapper(longValuesCount, doubleValuesCount, byteValuesCount, - decimalValuesCount, timestampValuesCount, intervalDayTimeValuesCount); + decimalValuesCount, timestampValuesCount, intervalDayTimeValuesCount, + keyCount); } @Override @@ -140,19 +149,14 @@ public void setHashKey() { * Hashing the string is potentially expensive so is better to branch. * Additionally not looking at values for nulls allows us not reset the values. */ - if (!isNull[longValues.length + doubleValues.length + i]) { + if (byteLengths[i] != -1) { byte[] bytes = byteValues[i]; int start = byteStarts[i]; int length = byteLengths[i]; - if (length == bytes.length && start == 0) { - hashcode ^= Arrays.hashCode(bytes); - } - else { - // Unfortunately there is no Arrays.hashCode(byte[], start, length) - for(int j = start; j < start + length; ++j) { - // use 461 as is a (sexy!) prime. - hashcode ^= 461 * bytes[j]; - } + // Unfortunately there is no Arrays.hashCode(byte[], start, length) + for(int j = start; j < start + length; ++j) { + // use 461 as is a (sexy!) prime. + hashcode ^= 461 * bytes[j]; } } } @@ -184,7 +188,7 @@ private boolean bytesEquals(VectorHashKeyWrapper keyThat) { //By the time we enter here the byteValues.lentgh and isNull must have already been compared for (int i = 0; i < byteValues.length; ++i) { // the byte comparison is potentially expensive so is better to branch on null - if (!isNull[longValues.length + doubleValues.length + i]) { + if (byteLengths[i] != -1) { if (!StringExpr.equal( byteValues[i], byteStarts[i], @@ -228,7 +232,7 @@ public void duplicateTo(VectorHashKeyWrapper clone) { for (int i = 0; i < byteValues.length; ++i) { // avoid allocation/copy of nulls, because it potentially expensive. // branch instead. - if (!isNull[longValues.length + doubleValues.length + i]) { + if (byteLengths[i] != -1) { clone.byteValues[i] = Arrays.copyOfRange(byteValues[i], byteStarts[i], byteStarts[i] + byteLengths[i]); } @@ -274,106 +278,141 @@ public void copyKey(KeyWrapper oldWrapper) { throw new UnsupportedOperationException(); } - public void assignDouble(int index, double d) { - doubleValues[index] = d; - isNull[longValues.length + index] = false; + public void assignLong(int index, long v) { + longValues[index] = v; } - public void assignNullDouble(int index) { - doubleValues[index] = 0; // assign 0 to simplify hashcode - isNull[longValues.length + index] = true; + public void assignNullLong(int keyIndex, int index) { + isNull[keyIndex] = true; + longValues[index] = 0; // assign 0 to simplify hashcode } - public void assignLong(int index, long v) { - longValues[index] = v; - isNull[index] = false; + public void assignDouble(int index, double d) { + doubleValues[index] = d; } - public void assignNullLong(int index) { - longValues[index] = 0; // assign 0 to simplify hashcode - isNull[index] = true; + public void assignNullDouble(int keyIndex, int index) { + isNull[keyIndex] = true; + doubleValues[index] = 0; // assign 0 to simplify hashcode } public void assignString(int index, byte[] bytes, int start, int length) { + Preconditions.checkState(bytes != null); byteValues[index] = bytes; byteStarts[index] = start; byteLengths[index] = length; - isNull[longValues.length + doubleValues.length + index] = false; } - public void assignNullString(int index) { - // We do not assign the value to byteValues[] because the value is never used on null - isNull[longValues.length + doubleValues.length + index] = true; + public void assignNullString(int keyIndex, int index) { + isNull[keyIndex] = true; + byteValues[index] = null; + byteStarts[index] = 0; + // We need some value that indicates NULL. + byteLengths[index] = -1; } public void assignDecimal(int index, HiveDecimalWritable value) { decimalValues[index].set(value); - isNull[longValues.length + doubleValues.length + byteValues.length + index] = false; } - public void assignNullDecimal(int index) { - isNull[longValues.length + doubleValues.length + byteValues.length + index] = true; + public void assignNullDecimal(int keyIndex, int index) { + isNull[keyIndex] = true; + decimalValues[index].set(HiveDecimal.ZERO); // assign 0 to simplify hashcode } public void assignTimestamp(int index, Timestamp value) { timestampValues[index] = value; - isNull[longValues.length + doubleValues.length + byteValues.length + - decimalValues.length + index] = false; } public void assignTimestamp(int index, TimestampColumnVector colVector, int elementNum) { colVector.timestampUpdate(timestampValues[index], elementNum); - isNull[longValues.length + doubleValues.length + byteValues.length + - decimalValues.length + index] = false; } - public void assignNullTimestamp(int index) { - isNull[longValues.length + doubleValues.length + byteValues.length + - decimalValues.length + index] = true; + public void assignNullTimestamp(int keyIndex, int index) { + isNull[keyIndex] = true; + timestampValues[index] = ZERO_TIMESTAMP; // assign 0 to simplify hashcode } public void assignIntervalDayTime(int index, HiveIntervalDayTime value) { intervalDayTimeValues[index].set(value); - isNull[longValues.length + doubleValues.length + byteValues.length + - decimalValues.length + timestampValues.length + index] = false; } public void assignIntervalDayTime(int index, IntervalDayTimeColumnVector colVector, int elementNum) { intervalDayTimeValues[index].set(colVector.asScratchIntervalDayTime(elementNum)); - isNull[longValues.length + doubleValues.length + byteValues.length + - decimalValues.length + timestampValues.length + index] = false; } - public void assignNullIntervalDayTime(int index) { - isNull[longValues.length + doubleValues.length + byteValues.length + - decimalValues.length + timestampValues.length + index] = true; + public void assignNullIntervalDayTime(int keyIndex, int index) { + isNull[keyIndex] = true; + intervalDayTimeValues[index] = ZERO_INTERVALDAYTIME; // assign 0 to simplify hashcode } @Override public String toString() { - return String.format("%d[%s] %d[%s] %d[%s] %d[%s] %d[%s] %d[%s]", - longValues.length, Arrays.toString(longValues), - doubleValues.length, Arrays.toString(doubleValues), - byteValues.length, Arrays.toString(byteValues), - decimalValues.length, Arrays.toString(decimalValues), - timestampValues.length, Arrays.toString(timestampValues), - intervalDayTimeValues.length, Arrays.toString(intervalDayTimeValues)); - } - - public boolean getIsLongNull(int i) { - return isNull[i]; - } + StringBuilder sb = new StringBuilder(); + boolean isFirst = true; + if (longValues.length > 0) { + isFirst = false; + sb.append("longs "); + sb.append(Arrays.toString(longValues)); + } + if (doubleValues.length > 0) { + if (isFirst) { + isFirst = false; + } else { + sb.append(", "); + } + sb.append("doubles "); + sb.append(Arrays.toString(doubleValues)); + } + if (byteValues.length > 0) { + if (isFirst) { + isFirst = false; + } else { + sb.append(", "); + } + sb.append("byte lengths "); + sb.append(Arrays.toString(byteLengths)); + } + if (decimalValues.length > 0) { + if (isFirst) { + isFirst = false; + } else { + sb.append(", "); + } + sb.append("decimals "); + sb.append(Arrays.toString(decimalValues)); + } + if (timestampValues.length > 0) { + if (isFirst) { + isFirst = false; + } else { + sb.append(", "); + } + sb.append("timestamps "); + sb.append(Arrays.toString(timestampValues)); + } + if (intervalDayTimeValues.length > 0) { + if (isFirst) { + isFirst = false; + } else { + sb.append(", "); + } + sb.append("interval day times "); + sb.append(Arrays.toString(intervalDayTimeValues)); + } - public boolean getIsDoubleNull(int i) { - return isNull[longValues.length + i]; - } + if (isFirst) { + isFirst = false; + } else { + sb.append(", "); + } + sb.append("nulls "); + sb.append(Arrays.toString(isNull)); - public boolean getIsBytesNull(int i) { - return isNull[longValues.length + doubleValues.length + i]; + return sb.toString(); } - public long getLongValue(int i) { return longValues[i]; } @@ -403,35 +442,29 @@ public int getVariableSize() { return variableSize; } - public boolean getIsDecimalNull(int i) { - return isNull[longValues.length + doubleValues.length + byteValues.length + i]; - } - public HiveDecimalWritable getDecimal(int i) { return decimalValues[i]; } - public boolean getIsTimestampNull(int i) { - return isNull[longValues.length + doubleValues.length + byteValues.length + - decimalValues.length + i]; - } - public Timestamp getTimestamp(int i) { return timestampValues[i]; } - public boolean getIsIntervalDayTimeNull(int i) { - return isNull[longValues.length + doubleValues.length + byteValues.length + - decimalValues.length + timestampValues.length + i]; - } - public HiveIntervalDayTime getIntervalDayTime(int i) { return intervalDayTimeValues[i]; } + public void clearIsNull() { + Arrays.fill(isNull, false); + } + + public boolean isNull(int keyIndex) { + return isNull[keyIndex]; + } + public static final class EmptyVectorHashKeyWrapper extends VectorHashKeyWrapper { private EmptyVectorHashKeyWrapper() { - super(0, 0, 0, 0, 0, 0); + super(0, 0, 0, 0, 0, 0, /* keyCount */ 0); // no need to override assigns - all assign ops will fail due to 0 size } @@ -451,4 +484,3 @@ public boolean equals(Object that) { } } } - diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java index f68228c..63cdf94 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java @@ -23,6 +23,9 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; /** * Class for handling vectorized hash map key wrappers. It evaluates the key columns in a @@ -85,12 +88,168 @@ public int getKeysFixedSize() { * @throws HiveException */ public void evaluateBatch(VectorizedRowBatch batch) throws HiveException { - for(int i = 0; i < keyExpressions.length; ++i) { - keyExpressions[i].evaluate(batch); + + for(int i=0;i= 0) { - return kw.getIsLongNull(klh.longIndex) ? null : - keyOutputWriter.writeValue(kw.getLongValue(klh.longIndex)); - } else if (klh.doubleIndex >= 0) { - return kw.getIsDoubleNull(klh.doubleIndex) ? null : - keyOutputWriter.writeValue(kw.getDoubleValue(klh.doubleIndex)); - } else if (klh.stringIndex >= 0) { - return kw.getIsBytesNull(klh.stringIndex) ? null : - keyOutputWriter.writeValue( - kw.getBytes(klh.stringIndex), - kw.getByteStart(klh.stringIndex), - kw.getByteLength(klh.stringIndex)); - } else if (klh.decimalIndex >= 0) { - return kw.getIsDecimalNull(klh.decimalIndex)? null : - keyOutputWriter.writeValue( - kw.getDecimal(klh.decimalIndex)); - } else if (klh.timestampIndex >= 0) { - return kw.getIsTimestampNull(klh.timestampIndex)? null : - keyOutputWriter.writeValue( - kw.getTimestamp(klh.timestampIndex)); - } else if (klh.intervalDayTimeIndex >= 0) { - return kw.getIsIntervalDayTimeNull(klh.intervalDayTimeIndex)? null : - keyOutputWriter.writeValue( - kw.getIntervalDayTime(klh.intervalDayTimeIndex)); - } else { - throw new HiveException(String.format( - "Internal inconsistent KeyLookupHelper at index [%d]:%d %d %d %d %d %d", - i, klh.longIndex, klh.doubleIndex, klh.stringIndex, klh.decimalIndex, - klh.timestampIndex, klh.intervalDayTimeIndex)); + if (kw.isNull(keyIndex)) { + return null; + } + + ColumnVector.Type columnVectorType = columnVectorTypes[keyIndex]; + int columnTypeSpecificIndex = columnTypeSpecificIndices[keyIndex]; + + switch (columnVectorType) { + case LONG: + return keyOutputWriter.writeValue( + kw.getLongValue(columnTypeSpecificIndex)); + case DOUBLE: + return keyOutputWriter.writeValue( + kw.getDoubleValue(columnTypeSpecificIndex)); + case BYTES: + return keyOutputWriter.writeValue( + kw.getBytes(columnTypeSpecificIndex), + kw.getByteStart(columnTypeSpecificIndex), + kw.getByteLength(columnTypeSpecificIndex)); + case DECIMAL: + return keyOutputWriter.writeValue( + kw.getDecimal(columnTypeSpecificIndex)); + case TIMESTAMP: + return keyOutputWriter.writeValue( + kw.getTimestamp(columnTypeSpecificIndex)); + case INTERVAL_DAY_TIME: + return keyOutputWriter.writeValue( + kw.getIntervalDayTime(columnTypeSpecificIndex)); + default: + throw new HiveException("Unexpected column vector type " + columnVectorType); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java index 848fc8e..4e05fa3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java @@ -215,6 +215,9 @@ public void process(Object row, int tag) throws HiveException { } } + for (VectorExpression ve : keyExpressions) { + ve.evaluate(inBatch); + } keyWrapperBatch.evaluateBatch(inBatch); keyValues = keyWrapperBatch.getVectorHashKeyWrappers(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java index ac3363e..f8c4223 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java @@ -257,6 +257,9 @@ public void process(Object row, int tag) throws HiveException { } } + for (VectorExpression ve : keyExpressions) { + ve.evaluate(inBatch); + } keyWrapperBatch.evaluateBatch(inBatch); keyValues = keyWrapperBatch.getVectorHashKeyWrappers(); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 50eda15..4393de8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -1938,14 +1938,6 @@ private boolean validateFilterOperator(FilterOperator op) { private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, boolean isTezOrSpark) { GroupByDesc desc = op.getConf(); - if (desc.isGroupingSetsPresent()) { - setOperatorIssue("Grouping sets not supported"); - return false; - } - if (desc.pruneGroupingSetId()) { - setOperatorIssue("Pruning grouping set id not supported"); - return false; - } if (desc.getMode() != GroupByDesc.Mode.HASH && desc.isDistinct()) { setOperatorIssue("DISTINCT not supported"); return false; @@ -2055,6 +2047,11 @@ private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, bo ProcessingMode processingMode = VectorGroupByDesc.groupByDescModeToVectorProcessingMode(desc.getMode(), hasKeys); + if (desc.isGroupingSetsPresent() && + (processingMode != ProcessingMode.HASH && processingMode != ProcessingMode.STREAMING)) { + LOG.info("Vectorized GROUPING SETS only expected for HASH and STREAMING processing modes"); + return false; + } Pair retPair = validateAggregationDescs(desc.getAggregators(), processingMode, hasKeys); @@ -2259,6 +2256,12 @@ private boolean validateGenericUdf(ExprNodeGenericFuncDesc genericUDFExpr) { setExpressionIssue("Aggregation Function", "UDF " + udfName + " not supported"); return new Pair(false, false); } + /* + if (aggDesc.getDistinct()) { + setExpressionIssue("Aggregation Function", "DISTINCT not supported"); + return new Pair(false, false); + } + */ if (aggDesc.getParameters() != null && !validateExprNodeDesc(aggDesc.getParameters(), "Aggregation Function UDF " + udfName + " parameter")) { return new Pair(false, false); } diff --git ql/src/test/queries/clientpositive/groupby_grouping_id1.q ql/src/test/queries/clientpositive/groupby_grouping_id1.q index d43ea37..9948ce9 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_id1.q +++ ql/src/test/queries/clientpositive/groupby_grouping_id1.q @@ -2,6 +2,8 @@ CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE; LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +-- SORT_QUERY_RESULTS + SELECT key, val, GROUPING__ID from T1 group by key, val with cube; SELECT key, val, GROUPING__ID from T1 group by cube(key, val); diff --git ql/src/test/queries/clientpositive/groupby_grouping_id2.q ql/src/test/queries/clientpositive/groupby_grouping_id2.q index 77a1638..cc7f9e4 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_id2.q +++ ql/src/test/queries/clientpositive/groupby_grouping_id2.q @@ -1,3 +1,7 @@ +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE; LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1; diff --git ql/src/test/queries/clientpositive/groupby_grouping_id3.q ql/src/test/queries/clientpositive/groupby_grouping_id3.q index c6746a8..955dbe0 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_id3.q +++ ql/src/test/queries/clientpositive/groupby_grouping_id3.q @@ -6,6 +6,12 @@ set hive.cbo.enable = false; -- SORT_QUERY_RESULTS +EXPLAIN +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1; SELECT key, value, GROUPING__ID, count(*) FROM T1 GROUP BY key, value @@ -14,6 +20,12 @@ HAVING GROUPING__ID = 1; set hive.cbo.enable = true; +EXPLAIN +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1; SELECT key, value, GROUPING__ID, count(*) FROM T1 GROUP BY key, value diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets1.q ql/src/test/queries/clientpositive/groupby_grouping_sets1.q index e239a87..c22c97f 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets1.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets1.q @@ -1,19 +1,39 @@ +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +-- SORT_QUERY_RESULTS + CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1; SELECT * FROM T1; +EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube; SELECT a, b, count(*) from T1 group by a, b with cube; +EXPLAIN +SELECT a, b, count(*) from T1 group by cube(a, b); SELECT a, b, count(*) from T1 group by cube(a, b); +EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()); SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()); +EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)); SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)); +EXPLAIN +SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c); SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c); +EXPLAIN +SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)); SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)); +EXPLAIN +SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b); SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b); diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets2.q ql/src/test/queries/clientpositive/groupby_grouping_sets2.q index b470964..90e6325 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets2.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets2.q @@ -1,6 +1,10 @@ +set hive.explain.user=false; +set hive.cli.print.header=true; set hive.mapred.mode=nonstrict; set hive.new.job.grouping.set.cardinality=2; +-- SORT_QUERY_RESULTS + CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1; diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets3.q ql/src/test/queries/clientpositive/groupby_grouping_sets3.q index 3c1a5e7..16421e8 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets3.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets3.q @@ -1,3 +1,9 @@ +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +-- SORT_QUERY_RESULTS + -- In this test, 2 files are loaded into table T1. The data contains rows with the same value of a and b, -- with different number of rows for a and b in each file. Since bucketizedHiveInputFormat is used, -- this tests that the aggregate function stores the partial aggregate state correctly even if an diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets4.q ql/src/test/queries/clientpositive/groupby_grouping_sets4.q index 6e3201c..1074a3b 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets4.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets4.q @@ -1,3 +1,4 @@ +set hive.explain.user=false; set hive.mapred.mode=nonstrict; set hive.merge.mapfiles = false; set hive.merge.mapredfiles = false; diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets5.q ql/src/test/queries/clientpositive/groupby_grouping_sets5.q index c1c98b3..570d464 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets5.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets5.q @@ -7,6 +7,8 @@ CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMIN LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1; +-- SORT_QUERY_RESULTS + -- This tests that cubes and rollups work fine where the source is a sub-query EXPLAIN SELECT a, b, count(*) FROM diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets6.q ql/src/test/queries/clientpositive/groupby_grouping_sets6.q index 5cdb4a5..e537bce 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets6.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets6.q @@ -3,6 +3,8 @@ CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMIN LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1; +-- SORT_QUERY_RESULTS + set hive.optimize.ppd = false; -- This filter is not pushed down diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q index 7856097..34759ca 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q @@ -1,3 +1,6 @@ + +-- SORT_QUERY_RESULTS + CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE; LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1; diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets_limit.q ql/src/test/queries/clientpositive/groupby_grouping_sets_limit.q index db88d5f..b6c5143 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets_limit.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets_limit.q @@ -1,3 +1,6 @@ + +-- SORT_QUERY_RESULTS + CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1; diff --git ql/src/test/queries/clientpositive/vector_groupby_cube1.q ql/src/test/queries/clientpositive/vector_groupby_cube1.q new file mode 100644 index 0000000..fd2f0de --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_cube1.q @@ -0,0 +1,55 @@ +set hive.mapred.mode=nonstrict; +set hive.map.aggr=true; +set hive.groupby.skewindata=false; + +-- SORT_QUERY_RESULTS + +CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; + +EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube; +EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY CUBE(key, val); + +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube; + +EXPLAIN +SELECT key, val, GROUPING__ID, count(1) FROM T1 GROUP BY key, val with cube; + +SELECT key, val, GROUPING__ID, count(1) FROM T1 GROUP BY key, val with cube; + +EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube; + +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube; + +set hive.groupby.skewindata=true; + +EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube; + +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube; + +EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube; + +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube; + + +set hive.multigroupby.singlereducer=true; + +CREATE TABLE T2(key1 STRING, key2 STRING, val INT) STORED AS TEXTFILE; +CREATE TABLE T3(key1 STRING, key2 STRING, val INT) STORED AS TEXTFILE; + +EXPLAIN +FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube; + + +FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube; + diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_id1.q ql/src/test/queries/clientpositive/vector_groupby_grouping_id1.q new file mode 100644 index 0000000..d957433 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_id1.q @@ -0,0 +1,23 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +CREATE TABLE T1_text(key STRING, val STRING) STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +-- SORT_QUERY_RESULTS + +SELECT key, val, GROUPING__ID from T1 group by key, val with cube; +SELECT key, val, GROUPING__ID from T1 group by cube(key, val); + +SELECT GROUPING__ID, key, val from T1 group by key, val with rollup; +SELECT GROUPING__ID, key, val from T1 group by rollup (key, val); + +SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by key, val with cube; +SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by cube(key, val); + diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_id2.q ql/src/test/queries/clientpositive/vector_groupby_grouping_id2.q new file mode 100644 index 0000000..c4f6722 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_id2.q @@ -0,0 +1,65 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +set hive.groupby.skewindata = true; + +-- SORT_QUERY_RESULTS + +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP; +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY ROLLUP (key, value); + +SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID; + +SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY ROLLUP(key, value) +) t +GROUP BY GROUPING__ID; + + +SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID; + +SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY ROLLUP(key,value)) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY ROLLUP(key, value)) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID; + + + + + +set hive.groupby.skewindata = false; + +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP; + +SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID; + +SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID; + + diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_id3.q ql/src/test/queries/clientpositive/vector_groupby_grouping_id3.q new file mode 100644 index 0000000..29e9211 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_id3.q @@ -0,0 +1,42 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +set hive.cbo.enable = false; + +-- SORT_QUERY_RESULTS + +EXPLAIN +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1; +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1; + +set hive.cbo.enable = true; + +EXPLAIN +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1; +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1; + diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets1.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets1.q new file mode 100644 index 0000000..4683c88 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets1.q @@ -0,0 +1,43 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +-- SORT_QUERY_RESULTS + +CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +SELECT * FROM T1; + +EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube; +SELECT a, b, count(*) from T1 group by a, b with cube; +EXPLAIN +SELECT a, b, count(*) from T1 group by cube(a, b); +SELECT a, b, count(*) from T1 group by cube(a, b); + +EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()); +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()); + +EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)); +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)); + +EXPLAIN +SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c); +SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c); + +EXPLAIN +SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)); +SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)); + +EXPLAIN +SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b); +SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b); + diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets2.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets2.q new file mode 100644 index 0000000..158612c --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets2.q @@ -0,0 +1,36 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; +set hive.mapred.mode=nonstrict; +set hive.new.job.grouping.set.cardinality=2; + +-- SORT_QUERY_RESULTS + +CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +-- Since 4 grouping sets would be generated for the query below, an additional MR job should be created +EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube; + +EXPLAIN +SELECT a, b, count(*) from T1 group by cube(a, b); +SELECT a, b, count(*) from T1 group by a, b with cube; + +EXPLAIN +SELECT a, b, sum(c) from T1 group by a, b with cube; +SELECT a, b, sum(c) from T1 group by a, b with cube; + +CREATE TABLE T2(a STRING, b STRING, c int, d int) STORED AS ORC; + +INSERT OVERWRITE TABLE T2 +SELECT a, b, c, c from T1; + +EXPLAIN +SELECT a, b, sum(c+d) from T2 group by a, b with cube; +SELECT a, b, sum(c+d) from T2 group by a, b with cube; diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets3.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets3.q new file mode 100644 index 0000000..b4c0d19 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets3.q @@ -0,0 +1,40 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +-- SORT_QUERY_RESULTS + +-- In this test, 2 files are loaded into table T1. The data contains rows with the same value of a and b, +-- with different number of rows for a and b in each file. Since bucketizedHiveInputFormat is used, +-- this tests that the aggregate function stores the partial aggregate state correctly even if an +-- additional MR job is created for processing the grouping sets. +CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets1.txt' INTO TABLE T1_text; +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets2.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; +set hive.new.job.grouping.set.cardinality = 30; + +-- The query below will execute in a single MR job, since 4 rows are generated per input row +-- (cube of a,b will lead to (a,b), (a, null), (null, b) and (null, null) and +-- hive.new.job.grouping.set.cardinality is more than 4. +EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by a, b with cube; + +EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by cube(a, b); +SELECT a, b, avg(c), count(*) from T1 group by a, b with cube; + +set hive.new.job.grouping.set.cardinality=2; + +-- The query below will execute in 2 MR jobs, since hive.new.job.grouping.set.cardinality is set to 2. +-- The partial aggregation state should be maintained correctly across MR jobs. +EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by a, b with cube; +SELECT a, b, avg(c), count(*) from T1 group by a, b with cube; + diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets4.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets4.q new file mode 100644 index 0000000..ef0d832 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets4.q @@ -0,0 +1,57 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; +set hive.mapred.mode=nonstrict; +set hive.merge.mapfiles = false; +set hive.merge.mapredfiles = false; + +-- SORT_QUERY_RESULTS + +-- Set merging to false above to make the explain more readable + +CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +-- This tests that cubes and rollups work fine inside sub-queries. +EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a; + +EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by cube(a, b) ) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by cube(a, b) ) subq2 +on subq1.a = subq2.a; + +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a; + +set hive.new.job.grouping.set.cardinality=2; + +-- Since 4 grouping sets would be generated for each sub-query, an additional MR job should be created +-- for each of them +EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a; + +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a; + diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets5.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets5.q new file mode 100644 index 0000000..15be3f3 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets5.q @@ -0,0 +1,39 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; +set hive.mapred.mode=nonstrict; +set hive.merge.mapfiles = false; +set hive.merge.mapredfiles = false; +-- Set merging to false above to make the explain more readable + +CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +-- SORT_QUERY_RESULTS + +-- This tests that cubes and rollups work fine where the source is a sub-query +EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube; + +EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by cube(a, b); + +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube; + +set hive.new.job.grouping.set.cardinality=2; + +-- Since 4 grouping sets would be generated for the cube, an additional MR job should be created +EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube; + +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube; diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets6.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets6.q new file mode 100644 index 0000000..72c2078 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets6.q @@ -0,0 +1,38 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; +set hive.mapred.mode=nonstrict; + +CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +-- SORT_QUERY_RESULTS + +set hive.optimize.ppd = false; + +-- This filter is not pushed down +EXPLAIN +SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5; + +SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5; + +set hive.cbo.enable = true; + +-- This filter is pushed down through aggregate with grouping sets by Calcite +EXPLAIN +SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5; + +SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5; diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q new file mode 100644 index 0000000..7b7c892 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q @@ -0,0 +1,99 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +-- SORT_QUERY_RESULTS + +explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value); + +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value); + +explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value); + +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value); + +explain +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1; + +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1; + +explain +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end; + +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end; + +set hive.cbo.enable=false; + +explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value); + +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value); + +explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value); + +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value); + +explain +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1; + +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1; + +explain +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end; + +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end; diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_limit.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_limit.q new file mode 100644 index 0000000..fff706b --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_limit.q @@ -0,0 +1,43 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +-- SORT_QUERY_RESULTS + +EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube LIMIT 10; + +SELECT a, b, count(*) from T1 group by a, b with cube LIMIT 10; + +EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) LIMIT 10; + +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) LIMIT 10; + +EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) LIMIT 10; + +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) LIMIT 10; + +EXPLAIN +SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) LIMIT 10; + +SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) LIMIT 10; + +EXPLAIN +SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) LIMIT 10; + +SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) LIMIT 10; + +EXPLAIN +SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) LIMIT 10; + +SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) LIMIT 10; diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_window.q ql/src/test/queries/clientpositive/vector_groupby_grouping_window.q new file mode 100644 index 0000000..c025c4f --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_window.q @@ -0,0 +1,21 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +create table t(category int, live int, comments int) stored as orc; +insert into table t select key, 0, 2 from src tablesample(3 rows); + +explain +select category, max(live) live, max(comments) comments, rank() OVER (PARTITION BY category ORDER BY comments) rank1 +FROM t +GROUP BY category +GROUPING SETS ((), (category)) +HAVING max(comments) > 0; + +select category, max(live) live, max(comments) comments, rank() OVER (PARTITION BY category ORDER BY comments) rank1 +FROM t +GROUP BY category +GROUPING SETS ((), (category)) +HAVING max(comments) > 0; diff --git ql/src/test/queries/clientpositive/vector_groupby_rollup1.q ql/src/test/queries/clientpositive/vector_groupby_rollup1.q new file mode 100644 index 0000000..e08f8b9 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_rollup1.q @@ -0,0 +1,54 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.mapred.mode=nonstrict; +set hive.map.aggr=true; +set hive.groupby.skewindata=false; + +-- SORT_QUERY_RESULTS + +CREATE TABLE T1_text(key STRING, val STRING) STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with rollup; + +SELECT key, val, count(1) FROM T1 GROUP BY key, val with rollup; + +EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with rollup; + +SELECT key, count(distinct val) FROM T1 GROUP BY key with rollup; + +set hive.groupby.skewindata=true; + +EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with rollup; + +SELECT key, val, count(1) FROM T1 GROUP BY key, val with rollup; + +EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with rollup; + +SELECT key, count(distinct val) FROM T1 GROUP BY key with rollup; + + +set hive.multigroupby.singlereducer=true; + +CREATE TABLE T2(key1 STRING, key2 STRING, val INT) STORED AS ORC; +CREATE TABLE T3(key1 STRING, key2 STRING, val INT) STORED AS ORC; + +EXPLAIN +FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with rollup +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by rollup(key, val); + + +FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with rollup +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with rollup; + diff --git ql/src/test/results/clientpositive/groupby_grouping_id1.q.out ql/src/test/results/clientpositive/groupby_grouping_id1.q.out index c2a0393..8b203e7 100644 --- ql/src/test/results/clientpositive/groupby_grouping_id1.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_id1.q.out @@ -22,24 +22,24 @@ POSTHOOK: query: SELECT key, val, GROUPING__ID from T1 group by key, val with cu POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 3 -NULL 11 2 -NULL 12 2 -NULL 13 2 -NULL 17 2 -NULL 18 2 -NULL 28 2 -1 NULL 1 1 11 0 -2 NULL 1 +1 NULL 1 2 12 0 -3 NULL 1 +2 NULL 1 3 13 0 -7 NULL 1 +3 NULL 1 7 17 0 -8 NULL 1 +7 NULL 1 8 18 0 8 28 0 +8 NULL 1 +NULL 11 2 +NULL 12 2 +NULL 13 2 +NULL 17 2 +NULL 18 2 +NULL 28 2 +NULL NULL 3 PREHOOK: query: SELECT key, val, GROUPING__ID from T1 group by cube(key, val) PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -48,24 +48,24 @@ POSTHOOK: query: SELECT key, val, GROUPING__ID from T1 group by cube(key, val) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 3 -NULL 11 2 -NULL 12 2 -NULL 13 2 -NULL 17 2 -NULL 18 2 -NULL 28 2 -1 NULL 1 1 11 0 -2 NULL 1 +1 NULL 1 2 12 0 -3 NULL 1 +2 NULL 1 3 13 0 -7 NULL 1 +3 NULL 1 7 17 0 -8 NULL 1 +7 NULL 1 8 18 0 8 28 0 +8 NULL 1 +NULL 11 2 +NULL 12 2 +NULL 13 2 +NULL 17 2 +NULL 18 2 +NULL 28 2 +NULL NULL 3 PREHOOK: query: SELECT GROUPING__ID, key, val from T1 group by key, val with rollup PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -74,18 +74,18 @@ POSTHOOK: query: SELECT GROUPING__ID, key, val from T1 group by key, val with ro POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -3 NULL NULL -1 1 NULL 0 1 11 -1 2 NULL 0 2 12 -1 3 NULL 0 3 13 -1 7 NULL 0 7 17 -1 8 NULL 0 8 18 0 8 28 +1 1 NULL +1 2 NULL +1 3 NULL +1 7 NULL +1 8 NULL +3 NULL NULL PREHOOK: query: SELECT GROUPING__ID, key, val from T1 group by rollup (key, val) PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -94,18 +94,18 @@ POSTHOOK: query: SELECT GROUPING__ID, key, val from T1 group by rollup (key, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -3 NULL NULL -1 1 NULL 0 1 11 -1 2 NULL 0 2 12 -1 3 NULL 0 3 13 -1 7 NULL 0 7 17 -1 8 NULL 0 8 18 0 8 28 +1 1 NULL +1 2 NULL +1 3 NULL +1 7 NULL +1 8 NULL +3 NULL NULL PREHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by key, val with cube PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -114,24 +114,24 @@ POSTHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 3 3 -NULL 11 2 2 -NULL 12 2 2 -NULL 13 2 2 -NULL 17 2 2 -NULL 18 2 2 -NULL 28 2 2 -1 NULL 1 1 1 11 0 0 -2 NULL 1 1 +1 NULL 1 1 2 12 0 0 -3 NULL 1 1 +2 NULL 1 1 3 13 0 0 -7 NULL 1 1 +3 NULL 1 1 7 17 0 0 -8 NULL 1 1 +7 NULL 1 1 8 18 0 0 8 28 0 0 +8 NULL 1 1 +NULL 11 2 2 +NULL 12 2 2 +NULL 13 2 2 +NULL 17 2 2 +NULL 18 2 2 +NULL 28 2 2 +NULL NULL 3 3 PREHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by cube(key, val) PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -140,21 +140,21 @@ POSTHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 3 3 -NULL 11 2 2 -NULL 12 2 2 -NULL 13 2 2 -NULL 17 2 2 -NULL 18 2 2 -NULL 28 2 2 -1 NULL 1 1 1 11 0 0 -2 NULL 1 1 +1 NULL 1 1 2 12 0 0 -3 NULL 1 1 +2 NULL 1 1 3 13 0 0 -7 NULL 1 1 +3 NULL 1 1 7 17 0 0 -8 NULL 1 1 +7 NULL 1 1 8 18 0 0 8 28 0 0 +8 NULL 1 1 +NULL 11 2 2 +NULL 12 2 2 +NULL 13 2 2 +NULL 17 2 2 +NULL 18 2 2 +NULL 28 2 2 +NULL NULL 3 3 diff --git ql/src/test/results/clientpositive/groupby_grouping_id3.q.out ql/src/test/results/clientpositive/groupby_grouping_id3.q.out index 915e6e1..6670d9c 100644 --- ql/src/test/results/clientpositive/groupby_grouping_id3.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_id3.q.out @@ -14,6 +14,76 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@t1 +PREHOOK: query: EXPLAIN +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col2 = 1) (type: boolean) + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), 1 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), 1 (type: int) + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int), 1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: int), _col1 (type: int), 1 (type: int), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) FROM T1 GROUP BY key, value @@ -34,6 +104,75 @@ POSTHOOK: Input: default@t1 2 NULL 1 1 3 NULL 1 2 4 NULL 1 1 +PREHOOK: query: EXPLAIN +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int), _col1 (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col2 = 1) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), 1 (type: int), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) FROM T1 GROUP BY key, value diff --git ql/src/test/results/clientpositive/groupby_grouping_sets1.q.out ql/src/test/results/clientpositive/groupby_grouping_sets1.q.out index aebba0d..13eac20 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets1.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets1.q.out @@ -22,12 +22,73 @@ POSTHOOK: query: SELECT * FROM T1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -8 1 1 -5 2 2 +t1.a t1.b t1.c 1 1 3 2 2 4 2 3 5 3 2 8 +5 2 2 +8 1 1 +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 2 Data size: 72 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 72 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 72 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -36,21 +97,82 @@ POSTHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 6 -NULL 1 2 -NULL 2 3 -NULL 3 1 -1 NULL 1 +a b _c2 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 -3 NULL 1 +2 NULL 2 3 2 1 -5 NULL 1 +3 NULL 1 5 2 1 -8 NULL 1 +5 NULL 1 8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by cube(a, b) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by cube(a, b) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 2 Data size: 72 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 72 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 72 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT a, b, count(*) from T1 group by cube(a, b) PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -59,21 +181,82 @@ POSTHOOK: query: SELECT a, b, count(*) from T1 group by cube(a, b) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 6 -NULL 1 2 -NULL 2 3 -NULL 3 1 -1 NULL 1 +a b _c2 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 -3 NULL 1 +2 NULL 2 3 2 1 -5 NULL 1 +3 NULL 1 5 2 1 -8 NULL 1 +5 NULL 1 8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 4 Data size: 144 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 2 Data size: 72 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 72 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 72 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -82,21 +265,82 @@ POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 6 -NULL 1 2 -NULL 2 3 -NULL 3 1 -1 NULL 1 +a b _c2 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 -3 NULL 1 +2 NULL 2 3 2 1 -5 NULL 1 +3 NULL 1 5 2 1 -8 NULL 1 +5 NULL 1 8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 2 Data size: 72 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -105,17 +349,75 @@ POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, ( POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -1 NULL 1 +a b _c2 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 -3 NULL 1 +2 NULL 2 3 2 1 -5 NULL 1 +3 NULL 1 5 2 1 -8 NULL 1 +5 NULL 1 8 1 1 +8 NULL 1 +PREHOOK: query: EXPLAIN +SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string), c (type: string) + outputColumnNames: a, b, c + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: a (type: string), b (type: string), c (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 108 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int) + sort order: ++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int) + Statistics: Num rows: 3 Data size: 108 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -124,6 +426,12 @@ POSTHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### +a +1 +2 +3 +5 +8 NULL NULL NULL @@ -133,11 +441,59 @@ NULL NULL NULL NULL -1 -2 -3 -5 -8 +PREHOOK: query: EXPLAIN +SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: a (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -146,11 +502,72 @@ POSTHOOK: query: SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### +a 1 2 3 5 8 +PREHOOK: query: EXPLAIN +SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (UDFToDouble(a) + UDFToDouble(b)) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: double), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: double), _col1 (type: int) + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: double), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: double), _col2 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -159,6 +576,7 @@ POSTHOOK: query: SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### +_c0 _c1 2.0 1 4.0 1 5.0 2 diff --git ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out index b4f8ce7..93c007b 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out @@ -20,6 +20,7 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT a, b, count(*) from T1 group by a, b with cube POSTHOOK: type: QUERY +Explain STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -104,6 +105,7 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT a, b, count(*) from T1 group by cube(a, b) POSTHOOK: type: QUERY +Explain STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -190,27 +192,29 @@ POSTHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 6 -NULL 1 2 -NULL 2 3 -NULL 3 1 -1 NULL 1 +a b _c2 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 -3 NULL 1 +2 NULL 2 3 2 1 -5 NULL 1 +3 NULL 1 5 2 1 -8 NULL 1 +5 NULL 1 8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 PREHOOK: query: EXPLAIN SELECT a, b, sum(c) from T1 group by a, b with cube PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT a, b, sum(c) from T1 group by a, b with cube POSTHOOK: type: QUERY +Explain STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -297,21 +301,22 @@ POSTHOOK: query: SELECT a, b, sum(c) from T1 group by a, b with cube POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 23.0 -NULL 1 4.0 -NULL 2 14.0 -NULL 3 5.0 -1 NULL 3.0 +a b _c2 1 1 3.0 -2 NULL 9.0 +1 NULL 3.0 2 2 4.0 2 3 5.0 -3 NULL 8.0 +2 NULL 9.0 3 2 8.0 -5 NULL 2.0 +3 NULL 8.0 5 2 2.0 -8 NULL 1.0 +5 NULL 2.0 8 1 1.0 +8 NULL 1.0 +NULL 1 4.0 +NULL 2 14.0 +NULL 3 5.0 +NULL NULL 23.0 PREHOOK: query: CREATE TABLE T2(a STRING, b STRING, c int, d int) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -334,12 +339,14 @@ POSTHOOK: Lineage: t2.a SIMPLE [(t1)t1.FieldSchema(name:a, type:string, comment: POSTHOOK: Lineage: t2.b SIMPLE [(t1)t1.FieldSchema(name:b, type:string, comment:null), ] POSTHOOK: Lineage: t2.c EXPRESSION [(t1)t1.FieldSchema(name:c, type:string, comment:null), ] POSTHOOK: Lineage: t2.d EXPRESSION [(t1)t1.FieldSchema(name:c, type:string, comment:null), ] +_col0 _col1 _col2 _col3 PREHOOK: query: EXPLAIN SELECT a, b, sum(c+d) from T2 group by a, b with cube PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT a, b, sum(c+d) from T2 group by a, b with cube POSTHOOK: type: QUERY +Explain STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -426,18 +433,19 @@ POSTHOOK: query: SELECT a, b, sum(c+d) from T2 group by a, b with cube POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 #### A masked pattern was here #### -NULL NULL 46 -NULL 1 8 -NULL 2 28 -NULL 3 10 -1 NULL 6 +a b _c2 1 1 6 -2 NULL 18 +1 NULL 6 2 2 8 2 3 10 -3 NULL 16 +2 NULL 18 3 2 16 -5 NULL 4 +3 NULL 16 5 2 4 -8 NULL 2 +5 NULL 4 8 1 2 +8 NULL 2 +NULL 1 8 +NULL 2 28 +NULL 3 10 +NULL NULL 46 diff --git ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out index 67cbdcd..f9e44fd 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out @@ -28,6 +28,7 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT a, b, avg(c), count(*) from T1 group by a, b with cube POSTHOOK: type: QUERY +Explain STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -87,6 +88,7 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT a, b, avg(c), count(*) from T1 group by cube(a, b) POSTHOOK: type: QUERY +Explain STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -148,28 +150,30 @@ POSTHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 3.8333333333333335 12 -NULL 1 2.0 5 -NULL 2 5.2 5 -NULL 3 5.0 2 -1 NULL 2.6666666666666665 3 +a b _c2 _c3 1 1 3.0 2 1 2 2.0 1 -2 NULL 5.2 5 +1 NULL 2.6666666666666665 3 2 2 5.333333333333333 3 2 3 5.0 2 -3 NULL 8.0 1 +2 NULL 5.2 5 3 2 8.0 1 -5 NULL 2.0 1 +3 NULL 8.0 1 5 1 2.0 1 -8 NULL 1.0 2 +5 NULL 2.0 1 8 1 1.0 2 +8 NULL 1.0 2 +NULL 1 2.0 5 +NULL 2 5.2 5 +NULL 3 5.0 2 +NULL NULL 3.8333333333333335 12 PREHOOK: query: EXPLAIN SELECT a, b, avg(c), count(*) from T1 group by a, b with cube PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT a, b, avg(c), count(*) from T1 group by a, b with cube POSTHOOK: type: QUERY +Explain STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -256,19 +260,20 @@ POSTHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 3.8333333333333335 12 -NULL 1 2.0 5 -NULL 2 5.2 5 -NULL 3 5.0 2 -1 NULL 2.6666666666666665 3 +a b _c2 _c3 1 1 3.0 2 1 2 2.0 1 -2 NULL 5.2 5 +1 NULL 2.6666666666666665 3 2 2 5.333333333333333 3 2 3 5.0 2 -3 NULL 8.0 1 +2 NULL 5.2 5 3 2 8.0 1 -5 NULL 2.0 1 +3 NULL 8.0 1 5 1 2.0 1 -8 NULL 1.0 2 +5 NULL 2.0 1 8 1 1.0 2 +8 NULL 1.0 2 +NULL 1 2.0 5 +NULL 2 5.2 5 +NULL 3 5.0 2 +NULL NULL 3.8333333333333335 12 diff --git ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out index 166f110..46f696d 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out @@ -202,21 +202,21 @@ POSTHOOK: query: SELECT a, b, count(*) FROM POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 6 -NULL 1 2 -NULL 2 3 -NULL 3 1 -1 NULL 1 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 -3 NULL 1 +2 NULL 2 3 2 1 -5 NULL 1 +3 NULL 1 5 2 1 -8 NULL 1 +5 NULL 1 8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 PREHOOK: query: EXPLAIN SELECT a, b, count(*) FROM (SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube @@ -341,18 +341,18 @@ POSTHOOK: query: SELECT a, b, count(*) FROM POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 6 -NULL 1 2 -NULL 2 3 -NULL 3 1 -1 NULL 1 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 -3 NULL 1 +2 NULL 2 3 2 1 -5 NULL 1 +3 NULL 1 5 2 1 -8 NULL 1 +5 NULL 1 8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 diff --git ql/src/test/results/clientpositive/groupby_grouping_sets6.q.out ql/src/test/results/clientpositive/groupby_grouping_sets6.q.out index 16f0871..b174e89 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets6.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets6.q.out @@ -81,8 +81,8 @@ WHERE res.a=5 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -5 NULL 5 2 +5 NULL PREHOOK: query: EXPLAIN SELECT a, b FROM (SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res @@ -150,5 +150,5 @@ WHERE res.a=5 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -5 NULL 5 2 +5 NULL diff --git ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out index 7faf278..b82d9c2 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out @@ -85,17 +85,17 @@ group by rollup(key, value) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 3 1 1 +1 1 0 0 0 1 NULL 0 0 0 1 NULL 1 0 1 -1 1 0 0 0 -2 NULL 1 0 1 2 2 0 0 0 +2 NULL 1 0 1 +3 3 0 0 0 3 NULL 0 0 0 3 NULL 1 0 1 -3 3 0 0 0 -4 NULL 1 0 1 4 5 0 0 0 +4 NULL 1 0 1 +NULL NULL 3 1 1 PREHOOK: query: explain select key, value, `grouping__id`, grouping(key), grouping(value) from T1 @@ -167,22 +167,22 @@ group by cube(key, value) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 2 1 0 -NULL NULL 3 1 1 -NULL 1 2 1 0 -NULL 2 2 1 0 -NULL 3 2 1 0 -NULL 5 2 1 0 +1 1 0 0 0 1 NULL 0 0 0 1 NULL 1 0 1 -1 1 0 0 0 -2 NULL 1 0 1 2 2 0 0 0 +2 NULL 1 0 1 +3 3 0 0 0 3 NULL 0 0 0 3 NULL 1 0 1 -3 3 0 0 0 -4 NULL 1 0 1 4 5 0 0 0 +4 NULL 1 0 1 +NULL 1 2 1 0 +NULL 2 2 1 0 +NULL 3 2 1 0 +NULL 5 2 1 0 +NULL NULL 2 1 0 +NULL NULL 3 1 1 PREHOOK: query: explain select key, value from T1 @@ -261,12 +261,12 @@ having grouping(key) = 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL -NULL NULL NULL 1 NULL 2 NULL 3 NULL 5 +NULL NULL +NULL NULL PREHOOK: query: explain select key, value, grouping(key)+grouping(value) as x from T1 @@ -371,16 +371,16 @@ order by x desc, case when x = 1 then key end POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 2 -NULL 5 1 -NULL 3 1 -NULL 2 1 -NULL 1 1 -NULL NULL 1 1 NULL 1 2 NULL 1 3 NULL 1 4 NULL 1 +NULL 1 1 +NULL 2 1 +NULL 3 1 +NULL 5 1 +NULL NULL 1 +NULL NULL 2 PREHOOK: query: explain select key, value, `grouping__id`, grouping(key), grouping(value) from T1 @@ -452,17 +452,17 @@ group by rollup(key, value) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 3 1 1 +1 1 0 0 0 1 NULL 0 0 0 1 NULL 1 0 1 -1 1 0 0 0 -2 NULL 1 0 1 2 2 0 0 0 +2 NULL 1 0 1 +3 3 0 0 0 3 NULL 0 0 0 3 NULL 1 0 1 -3 3 0 0 0 -4 NULL 1 0 1 4 5 0 0 0 +4 NULL 1 0 1 +NULL NULL 3 1 1 PREHOOK: query: explain select key, value, `grouping__id`, grouping(key), grouping(value) from T1 @@ -534,22 +534,22 @@ group by cube(key, value) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 2 1 0 -NULL NULL 3 1 1 -NULL 1 2 1 0 -NULL 2 2 1 0 -NULL 3 2 1 0 -NULL 5 2 1 0 +1 1 0 0 0 1 NULL 0 0 0 1 NULL 1 0 1 -1 1 0 0 0 -2 NULL 1 0 1 2 2 0 0 0 +2 NULL 1 0 1 +3 3 0 0 0 3 NULL 0 0 0 3 NULL 1 0 1 -3 3 0 0 0 -4 NULL 1 0 1 4 5 0 0 0 +4 NULL 1 0 1 +NULL 1 2 1 0 +NULL 2 2 1 0 +NULL 3 2 1 0 +NULL 5 2 1 0 +NULL NULL 2 1 0 +NULL NULL 3 1 1 PREHOOK: query: explain select key, value from T1 @@ -625,12 +625,12 @@ having grouping(key) = 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL -NULL NULL NULL 1 NULL 2 NULL 3 NULL 5 +NULL NULL +NULL NULL PREHOOK: query: explain select key, value, grouping(key)+grouping(value) as x from T1 @@ -735,16 +735,16 @@ order by x desc, case when x = 1 then key end POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 2 -NULL 5 1 -NULL 3 1 -NULL 2 1 -NULL 1 1 -NULL NULL 1 1 NULL 1 2 NULL 1 3 NULL 1 4 NULL 1 +NULL 1 1 +NULL 2 1 +NULL 3 1 +NULL 5 1 +NULL NULL 1 +NULL NULL 2 PREHOOK: query: explain select key, value, grouping(key), grouping(value) from T1 @@ -816,11 +816,11 @@ group by key, value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -1 NULL 0 0 1 1 0 0 +1 NULL 0 0 2 2 0 0 -3 NULL 0 0 3 3 0 0 +3 NULL 0 0 4 5 0 0 PREHOOK: query: explain select key, value, grouping(value) @@ -893,11 +893,11 @@ group by key, value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -1 NULL 0 1 1 0 +1 NULL 0 2 2 0 -3 NULL 0 3 3 0 +3 NULL 0 4 5 0 PREHOOK: query: explain select key, value @@ -970,9 +970,9 @@ having grouping(key) = 0 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -1 NULL 1 1 +1 NULL 2 2 -3 NULL 3 3 +3 NULL 4 5 diff --git ql/src/test/results/clientpositive/groupby_grouping_sets_limit.q.out ql/src/test/results/clientpositive/groupby_grouping_sets_limit.q.out index e2d9d96..02636eb 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets_limit.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets_limit.q.out @@ -85,16 +85,16 @@ POSTHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube LIMIT 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 6 -NULL 1 2 -NULL 2 3 -NULL 3 1 -1 NULL 1 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 +2 NULL 2 3 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 PREHOOK: query: EXPLAIN SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) LIMIT 10 PREHOOK: type: QUERY @@ -166,16 +166,16 @@ POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 6 -NULL 1 2 -NULL 2 3 -NULL 3 1 -1 NULL 1 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 +2 NULL 2 3 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 PREHOOK: query: EXPLAIN SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) LIMIT 10 PREHOOK: type: QUERY @@ -247,15 +247,15 @@ POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, ( POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -1 NULL 1 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 -3 NULL 1 +2 NULL 2 3 2 1 -5 NULL 1 +3 NULL 1 5 2 1 +5 NULL 1 8 NULL 1 PREHOOK: query: EXPLAIN SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) LIMIT 10 @@ -325,6 +325,7 @@ POSTHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) LIMIT POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### +1 NULL NULL NULL @@ -334,7 +335,6 @@ NULL NULL NULL NULL -1 PREHOOK: query: EXPLAIN SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) LIMIT 10 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/groupby_grouping_id2.q.out ql/src/test/results/clientpositive/llap/groupby_grouping_id2.q.out index be5db9c..d7f83d5 100644 --- ql/src/test/results/clientpositive/llap/groupby_grouping_id2.q.out +++ ql/src/test/results/clientpositive/llap/groupby_grouping_id2.q.out @@ -22,6 +22,7 @@ POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### +key value grouping__id _c3 1 1 0 1 1 NULL 0 1 1 NULL 1 2 @@ -41,6 +42,7 @@ POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY ROLL POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### +key value grouping__id _c3 1 1 0 1 1 NULL 0 1 1 NULL 1 2 @@ -70,6 +72,7 @@ GROUP BY GROUPING__ID POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### +grouping__id _c1 0 6 1 4 3 1 @@ -91,6 +94,7 @@ GROUP BY GROUPING__ID POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### +grouping__id _c1 0 6 1 4 3 1 @@ -108,6 +112,7 @@ ON t1.GROUPING__ID = t2.GROUPING__ID POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### +t1.grouping__id t2.grouping__id 0 0 0 0 0 0 @@ -175,6 +180,7 @@ ON t1.GROUPING__ID = t2.GROUPING__ID POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### +t1.grouping__id t2.grouping__id 0 0 0 0 0 0 @@ -236,6 +242,7 @@ POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### +key value grouping__id _c3 1 1 0 1 1 NULL 0 1 1 NULL 1 2 @@ -265,6 +272,7 @@ GROUP BY GROUPING__ID POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### +grouping__id _c1 0 6 1 4 3 1 @@ -282,6 +290,7 @@ ON t1.GROUPING__ID = t2.GROUPING__ID POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### +t1.grouping__id t2.grouping__id 0 0 0 0 0 0 diff --git ql/src/test/results/clientpositive/llap/vector_count.q.out ql/src/test/results/clientpositive/llap/vector_count.q.out index 1bda3d3..88b1161 100644 --- ql/src/test/results/clientpositive/llap/vector_count.q.out +++ ql/src/test/results/clientpositive/llap/vector_count.q.out @@ -68,26 +68,12 @@ STAGE PLANS: TableScan alias: abcd Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: a (type: int), b (type: int), c (type: int), d (type: int) outputColumnNames: a, b, c, d - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT b), count(DISTINCT c), sum(d) - Group By Vectorization: - aggregators: VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFSumLong(col 3) -> bigint - className: VectorGroupByOperator - vectorOutput: true - keyExpressions: col 0, col 1, col 2 - native: false - projectedOutputColumns: [0, 1, 2] keys: a (type: int), b (type: int), c (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -96,23 +82,16 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: Uniform Hash IS false, No DISTINCT columns IS false Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: bigint) - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported + vectorized: false Reducer 2 Execution mode: llap Reduce Vectorization: @@ -178,26 +157,12 @@ STAGE PLANS: TableScan alias: abcd Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: a (type: int), b (type: int), c (type: int), d (type: int) outputColumnNames: _col1, _col2, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1), count(), count(_col1), count(_col2), count(_col3), count(_col4), count(DISTINCT _col1), count(DISTINCT _col2), count(DISTINCT _col3), count(DISTINCT _col4), count(DISTINCT _col1, _col2), count(DISTINCT _col2, _col3), count(DISTINCT _col3, _col4), count(DISTINCT _col1, _col4), count(DISTINCT _col1, _col3), count(DISTINCT _col2, _col4), count(DISTINCT _col1, _col2, _col3), count(DISTINCT _col2, _col3, _col4), count(DISTINCT _col1, _col3, _col4), count(DISTINCT _col1, _col2, _col4), count(DISTINCT _col1, _col2, _col3, _col4) - Group By Vectorization: - aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 4:long) -> bigint, VectorUDAFCountStar(*) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFCount(col 3) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFCount(col 3) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 0) -> bigint - className: VectorGroupByOperator - vectorOutput: true - keyExpressions: col 0, col 1, col 2, col 3 - native: false - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] keys: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 @@ -205,23 +170,16 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) sort order: ++++ - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: Uniform Hash IS false, No DISTINCT columns IS false Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint) - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported + vectorized: false Reducer 2 Execution mode: llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_empty_where.q.out ql/src/test/results/clientpositive/llap/vector_empty_where.q.out index 9f93f86..ed9d551 100644 --- ql/src/test/results/clientpositive/llap/vector_empty_where.q.out +++ ql/src/test/results/clientpositive/llap/vector_empty_where.q.out @@ -62,7 +62,7 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -111,7 +111,7 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) @@ -218,7 +218,7 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -267,7 +267,7 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) @@ -382,7 +382,7 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -431,7 +431,7 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) @@ -546,7 +546,7 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -595,7 +595,7 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out new file mode 100644 index 0000000..2fa1efe --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out @@ -0,0 +1,773 @@ +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY CUBE(key, val) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY CUBE(key, val) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 11 1 +1 NULL 1 +2 12 1 +2 NULL 1 +3 13 1 +3 NULL 1 +7 17 1 +7 NULL 1 +8 18 1 +8 28 1 +8 NULL 2 +NULL 11 1 +NULL 12 1 +NULL 13 1 +NULL 17 1 +NULL 18 1 +NULL 28 1 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT key, val, GROUPING__ID, count(1) FROM T1 GROUP BY key, val with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, val, GROUPING__ID, count(1) FROM T1 GROUP BY key, val with cube +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, val, GROUPING__ID, count(1) FROM T1 GROUP BY key, val with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, GROUPING__ID, count(1) FROM T1 GROUP BY key, val with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 11 0 1 +1 NULL 1 1 +2 12 0 1 +2 NULL 1 1 +3 13 0 1 +3 NULL 1 1 +7 17 0 1 +7 NULL 1 1 +8 18 0 1 +8 28 0 1 +8 NULL 1 2 +NULL 11 2 1 +NULL 12 2 1 +NULL 13 2 1 +NULL 17 2 1 +NULL 18 2 1 +NULL 28 2 1 +NULL NULL 3 6 +PREHOOK: query: EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT val) + keys: key (type: string), 0 (type: int), val (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col2:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 1 +2 1 +3 1 +7 1 +8 2 +NULL 6 +PREHOOK: query: EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 11 1 +1 NULL 1 +2 12 1 +2 NULL 1 +3 13 1 +3 NULL 1 +7 17 1 +7 NULL 1 +8 18 1 +8 28 1 +8 NULL 2 +NULL 11 1 +NULL 12 1 +NULL 13 1 +NULL 17 1 +NULL 18 1 +NULL 28 1 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT val) + keys: key (type: string), 0 (type: int), val (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col2:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: final + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 1 +2 1 +3 1 +7 1 +8 2 +NULL 6 +PREHOOK: query: CREATE TABLE T2(key1 STRING, key2 STRING, val INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key1 STRING, key2 STRING, val INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: CREATE TABLE T3(key1 STRING, key2 STRING, val INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T3 +POSTHOOK: query: CREATE TABLE T3(key1 STRING, key2 STRING, val INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T3 +PREHOOK: query: EXPLAIN +FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string), val (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(1) + keys: key (type: string), val (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t2 + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t3 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t2 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t3 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t2 +PREHOOK: Output: default@t3 +POSTHOOK: query: FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t2 +POSTHOOK: Output: default@t3 +POSTHOOK: Lineage: t2.key1 SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: t2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] +POSTHOOK: Lineage: t2.val EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: t3.key1 SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: t3.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] +POSTHOOK: Lineage: t3.val EXPRESSION [(t1)t1.null, ] diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out new file mode 100644 index 0000000..81b921c --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out @@ -0,0 +1,179 @@ +PREHOOK: query: CREATE TABLE T1_text(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.key SIMPLE [(t1_text)t1_text.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: t1.val SIMPLE [(t1_text)t1_text.FieldSchema(name:val, type:string, comment:null), ] +t1_text.key t1_text.val +PREHOOK: query: SELECT key, val, GROUPING__ID from T1 group by key, val with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, GROUPING__ID from T1 group by key, val with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key val grouping__id +1 11 0 +1 NULL 1 +2 12 0 +2 NULL 1 +3 13 0 +3 NULL 1 +7 17 0 +7 NULL 1 +8 18 0 +8 28 0 +8 NULL 1 +NULL 11 2 +NULL 12 2 +NULL 13 2 +NULL 17 2 +NULL 18 2 +NULL 28 2 +NULL NULL 3 +PREHOOK: query: SELECT key, val, GROUPING__ID from T1 group by cube(key, val) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, GROUPING__ID from T1 group by cube(key, val) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key val grouping__id +1 11 0 +1 NULL 1 +2 12 0 +2 NULL 1 +3 13 0 +3 NULL 1 +7 17 0 +7 NULL 1 +8 18 0 +8 28 0 +8 NULL 1 +NULL 11 2 +NULL 12 2 +NULL 13 2 +NULL 17 2 +NULL 18 2 +NULL 28 2 +NULL NULL 3 +PREHOOK: query: SELECT GROUPING__ID, key, val from T1 group by key, val with rollup +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT GROUPING__ID, key, val from T1 group by key, val with rollup +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +grouping__id key val +0 1 11 +0 2 12 +0 3 13 +0 7 17 +0 8 18 +0 8 28 +1 1 NULL +1 2 NULL +1 3 NULL +1 7 NULL +1 8 NULL +3 NULL NULL +PREHOOK: query: SELECT GROUPING__ID, key, val from T1 group by rollup (key, val) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT GROUPING__ID, key, val from T1 group by rollup (key, val) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +grouping__id key val +0 1 11 +0 2 12 +0 3 13 +0 7 17 +0 8 18 +0 8 28 +1 1 NULL +1 2 NULL +1 3 NULL +1 7 NULL +1 8 NULL +3 NULL NULL +PREHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by key, val with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by key, val with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key val grouping__id _c3 +1 11 0 0 +1 NULL 1 1 +2 12 0 0 +2 NULL 1 1 +3 13 0 0 +3 NULL 1 1 +7 17 0 0 +7 NULL 1 1 +8 18 0 0 +8 28 0 0 +8 NULL 1 1 +NULL 11 2 2 +NULL 12 2 2 +NULL 13 2 2 +NULL 17 2 2 +NULL 18 2 2 +NULL 28 2 2 +NULL NULL 3 3 +PREHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by cube(key, val) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by cube(key, val) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key val grouping__id _c3 +1 11 0 0 +1 NULL 1 1 +2 12 0 0 +2 NULL 1 1 +3 13 0 0 +3 NULL 1 1 +7 17 0 0 +7 NULL 1 1 +8 18 0 0 +8 28 0 0 +8 NULL 1 1 +NULL 11 2 2 +NULL 12 2 2 +NULL 13 2 2 +NULL 17 2 2 +NULL 18 2 2 +NULL 28 2 2 +NULL NULL 3 3 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out new file mode 100644 index 0000000..5edd2a6 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out @@ -0,0 +1,359 @@ +PREHOOK: query: CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.key SIMPLE [(t1_text)t1_text.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: t1.value SIMPLE [(t1_text)t1_text.FieldSchema(name:value, type:int, comment:null), ] +t1_text.key t1_text.value +PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 +1 1 0 1 +1 NULL 0 1 +1 NULL 1 2 +2 2 0 1 +2 NULL 1 1 +3 3 0 1 +3 NULL 0 1 +3 NULL 1 2 +4 5 0 1 +4 NULL 1 1 +NULL NULL 3 6 +PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY ROLLUP (key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY ROLLUP (key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 +1 1 0 1 +1 NULL 0 1 +1 NULL 1 2 +2 2 0 1 +2 NULL 1 1 +3 3 0 1 +3 NULL 0 1 +3 NULL 1 2 +4 5 0 1 +4 NULL 1 1 +NULL NULL 3 6 +PREHOOK: query: SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +grouping__id _c1 +0 6 +1 4 +3 1 +PREHOOK: query: SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY ROLLUP(key, value) +) t +GROUP BY GROUPING__ID +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY ROLLUP(key, value) +) t +GROUP BY GROUPING__ID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +grouping__id _c1 +0 6 +1 4 +3 1 +PREHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +t1.grouping__id t2.grouping__id +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +3 3 +PREHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY ROLLUP(key,value)) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY ROLLUP(key, value)) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY ROLLUP(key,value)) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY ROLLUP(key, value)) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +t1.grouping__id t2.grouping__id +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +3 3 +PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 +1 1 0 1 +1 NULL 0 1 +1 NULL 1 2 +2 2 0 1 +2 NULL 1 1 +3 3 0 1 +3 NULL 0 1 +3 NULL 1 2 +4 5 0 1 +4 NULL 1 1 +NULL NULL 3 6 +PREHOOK: query: SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +grouping__id _c1 +0 6 +1 4 +3 1 +PREHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +t1.grouping__id t2.grouping__id +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +3 3 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out new file mode 100644 index 0000000..8541d12 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out @@ -0,0 +1,232 @@ +PREHOOK: query: CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.key SIMPLE [(t1_text)t1_text.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: t1.value SIMPLE [(t1_text)t1_text.FieldSchema(name:value, type:int, comment:null), ] +t1_text.key t1_text.value +PREHOOK: query: EXPLAIN +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col2 = 1) (type: boolean) + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), 1 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), 1 (type: int) + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int), 1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: int), _col1 (type: int), 1 (type: int), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 +1 NULL 1 2 +2 NULL 1 1 +3 NULL 1 2 +4 NULL 1 1 +PREHOOK: query: EXPLAIN +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int), _col1 (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col2 = 1) (type: boolean) + Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), 1 (type: int), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 +1 NULL 1 2 +2 NULL 1 1 +3 NULL 1 2 +4 NULL 1 1 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out new file mode 100644 index 0000000..5d34347 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out @@ -0,0 +1,668 @@ +PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, type:string, comment:null), ] +t1_text.a t1_text.b t1_text.c +PREHOOK: query: SELECT * FROM T1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM T1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +t1.a t1.b t1.c +1 1 3 +2 2 4 +2 3 5 +3 2 8 +5 2 2 +8 1 1 +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 2 1 +3 NULL 1 +5 2 1 +5 NULL 1 +8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by cube(a, b) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by cube(a, b) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, count(*) from T1 group by cube(a, b) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) from T1 group by cube(a, b) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 2 1 +3 NULL 1 +5 2 1 +5 NULL 1 +8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 2 1 +3 NULL 1 +5 2 1 +5 NULL 1 +8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 2 1 +3 NULL 1 +5 2 1 +5 NULL 1 +8 1 1 +8 NULL 1 +PREHOOK: query: EXPLAIN +SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string), c (type: string) + outputColumnNames: a, b, c + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: a (type: string), b (type: string), c (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 18 Data size: 4590 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int) + sort order: ++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int) + Statistics: Num rows: 18 Data size: 4590 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 2295 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 2295 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 2295 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a +1 +2 +3 +5 +8 +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +PREHOOK: query: EXPLAIN +SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string) + outputColumnNames: a + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: a (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a +1 +2 +3 +5 +8 +PREHOOK: query: EXPLAIN +SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (UDFToDouble(a) + UDFToDouble(b)) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: double), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: double), _col1 (type: int) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: double), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col2 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: double), _col2 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +_c0 _c1 +2.0 1 +4.0 1 +5.0 2 +7.0 1 +9.0 1 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out new file mode 100644 index 0000000..4deef94 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out @@ -0,0 +1,469 @@ +PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, type:string, comment:null), ] +t1_text.a t1_text.b t1_text.c +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by cube(a, b) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by cube(a, b) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 2 1 +3 NULL 1 +5 2 1 +5 NULL 1 +8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT a, b, sum(c) from T1 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, sum(c) from T1 group by a, b with cube +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string), c (type: string) + outputColumnNames: a, b, c + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(c) + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: double) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, sum(c) from T1 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, sum(c) from T1 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 3.0 +1 NULL 3.0 +2 2 4.0 +2 3 5.0 +2 NULL 9.0 +3 2 8.0 +3 NULL 8.0 +5 2 2.0 +5 NULL 2.0 +8 1 1.0 +8 NULL 1.0 +NULL 1 4.0 +NULL 2 14.0 +NULL 3 5.0 +NULL NULL 23.0 +PREHOOK: query: CREATE TABLE T2(a STRING, b STRING, c int, d int) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(a STRING, b STRING, c int, d int) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: INSERT OVERWRITE TABLE T2 +SELECT a, b, c, c from T1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t2 +POSTHOOK: query: INSERT OVERWRITE TABLE T2 +SELECT a, b, c, c from T1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: t2.a SIMPLE [(t1)t1.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t2.b SIMPLE [(t1)t1.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t2.c EXPRESSION [(t1)t1.FieldSchema(name:c, type:string, comment:null), ] +POSTHOOK: Lineage: t2.d EXPRESSION [(t1)t1.FieldSchema(name:c, type:string, comment:null), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: EXPLAIN +SELECT a, b, sum(c+d) from T2 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, sum(c+d) from T2 group by a, b with cube +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string), (c + d) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, sum(c+d) from T2 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, sum(c+d) from T2 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +a b _c2 +1 1 6 +1 NULL 6 +2 2 8 +2 3 10 +2 NULL 18 +3 2 16 +3 NULL 16 +5 2 4 +5 NULL 4 +8 1 2 +8 NULL 2 +NULL 1 8 +NULL 2 28 +NULL 3 10 +NULL NULL 46 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out new file mode 100644 index 0000000..b30aabd --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out @@ -0,0 +1,314 @@ +PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets1.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets1.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets2.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets2.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, type:string, comment:null), ] +t1_text.a t1_text.b t1_text.c +PREHOOK: query: EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string), c (type: string) + outputColumnNames: a, b, c + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(c), count() + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 48 Data size: 12240 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 48 Data size: 12240 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: bigint) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double), _col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by cube(a, b) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by cube(a, b) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string), c (type: string) + outputColumnNames: a, b, c + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(c), count() + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 48 Data size: 12240 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 48 Data size: 12240 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: bigint) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double), _col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 _c3 +1 1 3.0 2 +1 2 2.0 1 +1 NULL 2.6666666666666665 3 +2 2 5.333333333333333 3 +2 3 5.0 2 +2 NULL 5.2 5 +3 2 8.0 1 +3 NULL 8.0 1 +5 1 2.0 1 +5 NULL 2.0 1 +8 1 1.0 2 +8 NULL 1.0 2 +NULL 1 2.0 5 +NULL 2 5.2 5 +NULL 3 5.0 2 +NULL NULL 3.8333333333333335 12 +PREHOOK: query: EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string), c (type: string) + outputColumnNames: a, b, c + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(c), count() + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: bigint) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 48 Data size: 12240 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 48 Data size: 12240 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double), _col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 _c3 +1 1 3.0 2 +1 2 2.0 1 +1 NULL 2.6666666666666665 3 +2 2 5.333333333333333 3 +2 3 5.0 2 +2 NULL 5.2 5 +3 2 8.0 1 +3 NULL 8.0 1 +5 1 2.0 1 +5 NULL 2.0 1 +8 1 1.0 2 +8 NULL 1.0 2 +NULL 1 2.0 5 +NULL 2 5.2 5 +NULL 3 5.0 2 +NULL NULL 3.8333333333333335 12 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out new file mode 100644 index 0000000..0175c38 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out @@ -0,0 +1,554 @@ +PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, type:string, comment:null), ] +t1_text.a t1_text.b t1_text.c +PREHOOK: query: EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(a) < 3.0) (type: boolean) + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(a) < 3.0) (type: boolean) + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 1122 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 1122 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by cube(a, b) ) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by cube(a, b) ) subq2 +on subq1.a = subq2.a +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by cube(a, b) ) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by cube(a, b) ) subq2 +on subq1.a = subq2.a +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(a) < 3.0) (type: boolean) + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(a) < 3.0) (type: boolean) + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 1122 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 1122 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +subq1.a subq1.b subq1._c2 subq2.a subq2.b subq2._c2 +1 1 1 1 1 1 +1 1 1 1 NULL 1 +1 NULL 1 1 1 1 +1 NULL 1 1 NULL 1 +2 2 1 2 2 1 +2 2 1 2 3 1 +2 2 1 2 NULL 2 +2 3 1 2 2 1 +2 3 1 2 3 1 +2 3 1 2 NULL 2 +2 NULL 2 2 2 1 +2 NULL 2 2 3 1 +2 NULL 2 2 NULL 2 +PREHOOK: query: EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(a) < 3.0) (type: boolean) + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(a) < 3.0) (type: boolean) + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 1122 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 1122 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 7 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +subq1.a subq1.b subq1._c2 subq2.a subq2.b subq2._c2 +1 1 1 1 1 1 +1 1 1 1 NULL 1 +1 NULL 1 1 1 1 +1 NULL 1 1 NULL 1 +2 2 1 2 2 1 +2 2 1 2 3 1 +2 2 1 2 NULL 2 +2 3 1 2 2 1 +2 3 1 2 3 1 +2 3 1 2 NULL 2 +2 NULL 2 2 2 1 +2 NULL 2 2 3 1 +2 NULL 2 2 NULL 2 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out new file mode 100644 index 0000000..c9b666c --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out @@ -0,0 +1,371 @@ +PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, type:string, comment:null), ] +t1_text.a t1_text.b t1_text.c +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by cube(a, b) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by cube(a, b) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 2 1 +3 NULL 1 +5 2 1 +5 NULL 1 +8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 2 1 +3 NULL 1 +5 2 1 +5 NULL 1 +8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out new file mode 100644 index 0000000..4081ea9 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out @@ -0,0 +1,192 @@ +PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, type:string, comment:null), ] +t1_text.a t1_text.b t1_text.c +PREHOOK: query: EXPLAIN +SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(a) = 5.0) (type: boolean) + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b +5 2 +5 NULL +PREHOOK: query: EXPLAIN +SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(a) = 5.0) (type: boolean) + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b +5 2 +5 NULL diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out new file mode 100644 index 0000000..8696207 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out @@ -0,0 +1,838 @@ +PREHOOK: query: CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.key SIMPLE [(t1_text)t1_text.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: t1.value SIMPLE [(t1_text)t1_text.FieldSchema(name:value, type:int, comment:null), ] +t1_text.key t1_text.value +PREHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 18 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 18 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 _c4 +1 1 0 0 0 +1 NULL 0 0 0 +1 NULL 1 0 1 +2 2 0 0 0 +2 NULL 1 0 1 +3 3 0 0 0 +3 NULL 0 0 0 +3 NULL 1 0 1 +4 5 0 0 0 +4 NULL 1 0 1 +NULL NULL 3 1 1 +PREHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 _c4 +1 1 0 0 0 +1 NULL 0 0 0 +1 NULL 1 0 1 +2 2 0 0 0 +2 NULL 1 0 1 +3 3 0 0 0 +3 NULL 0 0 0 +3 NULL 1 0 1 +4 5 0 0 0 +4 NULL 1 0 1 +NULL 1 2 1 0 +NULL 2 2 1 0 +NULL 3 2 1 0 +NULL 5 2 1 0 +NULL NULL 2 1 0 +NULL NULL 3 1 1 +PREHOOK: query: explain +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToInteger(grouping(_col2, 1)) = 1) (type: boolean) + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value +NULL 1 +NULL 2 +NULL 3 +NULL 5 +NULL NULL +NULL NULL +PREHOOK: query: explain +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((UDFToInteger(grouping(_col2, 1)) = 1) or (UDFToInteger(grouping(_col2, 0)) = 1)) (type: boolean) + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), (grouping(_col2, 1) + grouping(_col2, 0)) (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: tinyint), CASE WHEN ((_col2 = 1)) THEN (_col0) ELSE (null) END (type: int) + sort order: -+ + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value x +1 NULL 1 +2 NULL 1 +3 NULL 1 +4 NULL 1 +NULL 1 1 +NULL 2 1 +NULL 3 1 +NULL 5 1 +NULL NULL 1 +NULL NULL 2 +PREHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 18 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 18 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 _c4 +1 1 0 0 0 +1 NULL 0 0 0 +1 NULL 1 0 1 +2 2 0 0 0 +2 NULL 1 0 1 +3 3 0 0 0 +3 NULL 0 0 0 +3 NULL 1 0 1 +4 5 0 0 0 +4 NULL 1 0 1 +NULL NULL 3 1 1 +PREHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 _c4 +1 1 0 0 0 +1 NULL 0 0 0 +1 NULL 1 0 1 +2 2 0 0 0 +2 NULL 1 0 1 +3 3 0 0 0 +3 NULL 0 0 0 +3 NULL 1 0 1 +4 5 0 0 0 +4 NULL 1 0 1 +NULL 1 2 1 0 +NULL 2 2 1 0 +NULL 3 2 1 0 +NULL 5 2 1 0 +NULL NULL 2 1 0 +NULL NULL 3 1 1 +PREHOOK: query: explain +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (grouping(_col2, 1) = 1) (type: boolean) + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value +NULL 1 +NULL 2 +NULL 3 +NULL 5 +NULL NULL +NULL NULL +PREHOOK: query: explain +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((grouping(_col2, 1) = 1) or (grouping(_col2, 0) = 1)) (type: boolean) + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), (grouping(_col2, 1) + grouping(_col2, 0)) (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: tinyint), CASE WHEN ((_col2 = 1)) THEN (_col0) END (type: int) + sort order: -+ + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value x +1 NULL 1 +2 NULL 1 +3 NULL 1 +4 NULL 1 +NULL 1 1 +NULL 2 1 +NULL 3 1 +NULL 5 1 +NULL NULL 1 +NULL NULL 2 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out new file mode 100644 index 0000000..91d9e88 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out @@ -0,0 +1,568 @@ +PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, type:string, comment:null), ] +t1_text.a t1_text.b t1_text.c +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube LIMIT 10 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2550 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 2550 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) LIMIT 10 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2550 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 2550 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) LIMIT 10 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 2 1 +3 NULL 1 +5 2 1 +5 NULL 1 +8 NULL 1 +PREHOOK: query: EXPLAIN +SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) LIMIT 10 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string), c (type: string) + outputColumnNames: a, b, c + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: a (type: string), b (type: string), c (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 18 Data size: 4590 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int) + sort order: ++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int) + Statistics: Num rows: 18 Data size: 4590 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 2295 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 2295 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 9 Data size: 2295 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 2295 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a +1 +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +PREHOOK: query: EXPLAIN +SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) LIMIT 10 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string) + outputColumnNames: a + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: a (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Limit + Number of rows: 10 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a +1 +2 +3 +5 +8 +PREHOOK: query: EXPLAIN +SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) LIMIT 10 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (UDFToDouble(a) + UDFToDouble(b)) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: double), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: double), _col1 (type: int) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: double), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col2 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: double), _col2 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +_c0 _c1 +2.0 1 +4.0 1 +5.0 2 +7.0 1 +9.0 1 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out new file mode 100644 index 0000000..678db83 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out @@ -0,0 +1,157 @@ +PREHOOK: query: create table t(category int, live int, comments int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: create table t(category int, live int, comments int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: insert into table t select key, 0, 2 from src tablesample(3 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t +POSTHOOK: query: insert into table t select key, 0, 2 from src tablesample(3 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.category EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t.comments SIMPLE [] +POSTHOOK: Lineage: t.live SIMPLE [] +_col0 _col1 _col2 +PREHOOK: query: explain +select category, max(live) live, max(comments) comments, rank() OVER (PARTITION BY category ORDER BY comments) rank1 +FROM t +GROUP BY category +GROUPING SETS ((), (category)) +HAVING max(comments) > 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select category, max(live) live, max(comments) comments, rank() OVER (PARTITION BY category ORDER BY comments) rank1 +FROM t +GROUP BY category +GROUPING SETS ((), (category)) +HAVING max(comments) > 0 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: category (type: int), live (type: int), comments (type: int) + outputColumnNames: category, live, comments + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(live), max(comments) + keys: category (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), max(VALUE._col1) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Filter Operator + predicate: (_col3 > 0) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col3 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col2: int, _col3: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col3 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col3 + name: rank + window function: GenericUDAFRankEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select category, max(live) live, max(comments) comments, rank() OVER (PARTITION BY category ORDER BY comments) rank1 +FROM t +GROUP BY category +GROUPING SETS ((), (category)) +HAVING max(comments) > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select category, max(live) live, max(comments) comments, rank() OVER (PARTITION BY category ORDER BY comments) rank1 +FROM t +GROUP BY category +GROUPING SETS ((), (category)) +HAVING max(comments) > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +category live comments rank1 +NULL 0 2 1 +86 0 2 1 +238 0 2 1 +311 0 2 1 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out new file mode 100644 index 0000000..e737f0b --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out @@ -0,0 +1,610 @@ +PREHOOK: query: CREATE TABLE T1_text(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.key SIMPLE [(t1_text)t1_text.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: t1.val SIMPLE [(t1_text)t1_text.FieldSchema(name:val, type:string, comment:null), ] +PREHOOK: query: EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with rollup +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with rollup +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 18 Data size: 3078 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 18 Data size: 3078 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 9 Data size: 1539 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 1539 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 1539 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val with rollup +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val with rollup +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 11 1 +1 NULL 1 +2 12 1 +2 NULL 1 +3 13 1 +3 NULL 1 +7 17 1 +7 NULL 1 +8 18 1 +8 28 1 +8 NULL 2 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with rollup +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with rollup +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT val) + keys: key (type: string), 0 (type: int), val (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 2052 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 12 Data size: 2052 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col2:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col2 + Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with rollup +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with rollup +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 0 +2 0 +3 0 +7 0 +8 0 +NULL 0 +PREHOOK: query: EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with rollup +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with rollup +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 18 Data size: 3078 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 18 Data size: 3078 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 18 Data size: 3078 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 18 Data size: 3078 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 9 Data size: 1539 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 1539 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 1539 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val with rollup +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val with rollup +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 11 1 +1 NULL 1 +2 12 1 +2 NULL 1 +3 13 1 +3 NULL 1 +7 17 1 +7 NULL 1 +8 18 1 +8 28 1 +8 NULL 2 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with rollup +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with rollup +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT val) + keys: key (type: string), 0 (type: int), val (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 2052 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 2052 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col2:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 2052 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 2052 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: final + outputColumnNames: _col0, _col2 + Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with rollup +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with rollup +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 0 +2 0 +3 0 +7 0 +8 0 +NULL 0 +PREHOOK: query: CREATE TABLE T2(key1 STRING, key2 STRING, val INT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key1 STRING, key2 STRING, val INT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: CREATE TABLE T3(key1 STRING, key2 STRING, val INT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T3 +POSTHOOK: query: CREATE TABLE T3(key1 STRING, key2 STRING, val INT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T3 +PREHOOK: query: EXPLAIN +FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with rollup +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by rollup(key, val) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with rollup +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by rollup(key, val) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string), val (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 18 Data size: 3078 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 18 Data size: 3078 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(1) + keys: key (type: string), val (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 18 Data size: 3078 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 18 Data size: 3078 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 18 Data size: 3078 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 18 Data size: 3078 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 9 Data size: 1539 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 1539 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 1539 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t2 + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 18 Data size: 3078 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 18 Data size: 3078 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 9 Data size: 1539 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 1539 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 1539 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t3 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t2 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t3 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with rollup +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with rollup +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t2 +PREHOOK: Output: default@t3 +POSTHOOK: query: FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with rollup +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with rollup +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t2 +POSTHOOK: Output: default@t3 +POSTHOOK: Lineage: t2.key1 SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: t2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] +POSTHOOK: Lineage: t2.val EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: t3.key1 SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: t3.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] +POSTHOOK: Lineage: t3.val EXPRESSION [(t1)t1.null, ] diff --git ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out index b15b61c..bb99556 100644 --- ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out +++ ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out @@ -154,11 +154,24 @@ STAGE PLANS: TableScan alias: store Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] Select Operator expressions: s_store_id (type: string) outputColumnNames: s_store_id + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1, ConstantVectorExpression(val 0) -> 29:long + native: false + projectedOutputColumns: [] keys: s_store_id (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -167,24 +180,38 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: GROUPBY operator: Grouping sets not supported - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Pruning grouping set id not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0 @@ -192,6 +219,9 @@ STAGE PLANS: pruneGroupingSetId: true File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -254,11 +284,24 @@ STAGE PLANS: TableScan alias: store Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] Select Operator expressions: s_store_id (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1, ConstantVectorExpression(val 0) -> 29:long + native: false + projectedOutputColumns: [] keys: _col0 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -267,15 +310,21 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: GROUPBY operator: Grouping sets not supported - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -382,7 +431,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_non_string_partition.q.out ql/src/test/results/clientpositive/llap/vector_non_string_partition.q.out index ae52b60..81dafa1 100644 --- ql/src/test/results/clientpositive/llap/vector_non_string_partition.q.out +++ ql/src/test/results/clientpositive/llap/vector_non_string_partition.q.out @@ -27,10 +27,14 @@ POSTHOOK: query: SHOW PARTITIONS non_string_part POSTHOOK: type: SHOWPARTITIONS POSTHOOK: Input: default@non_string_part ctinyint=__HIVE_DEFAULT_PARTITION__ -PREHOOK: query: EXPLAIN SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -38,6 +42,7 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -46,36 +51,79 @@ STAGE PLANS: Map Operator Tree: TableScan alias: non_string_part - Statistics: Num rows: 3073 Data size: 339150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3073 Data size: 351442 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 0) -> boolean predicate: (cint > 0) (type: boolean) - Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1024 Data size: 4096 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: cint (type: int), ctinyint (type: tinyint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 4] + Statistics: Num rows: 1024 Data size: 4096 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + Statistics: Num rows: 1024 Data size: 4096 Basic stats: COMPLETE Column stats: PARTIAL + TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: tinyint) Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, uber + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: tinyint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 1024 Data size: 4096 Basic stats: COMPLETE Column stats: PARTIAL Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 1100 Basic stats: COMPLETE Column stats: NONE + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 1100 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: PARTIAL table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -104,10 +152,14 @@ POSTHOOK: Input: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__ 799471 NULL 1248059 NULL 1286921 NULL -PREHOOK: query: EXPLAIN SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -115,6 +167,7 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -123,35 +176,78 @@ STAGE PLANS: Map Operator Tree: TableScan alias: non_string_part - Statistics: Num rows: 3073 Data size: 339150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3073 Data size: 363734 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 0) -> boolean predicate: (cint > 0) (type: boolean) - Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1024 Data size: 121205 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cstring1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 1024 Data size: 121205 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + Statistics: Num rows: 1024 Data size: 121205 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, uber + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 1024 Data size: 121205 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 1100 Basic stats: COMPLETE Column stats: NONE + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 1180 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 1100 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 1180 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 diff --git ql/src/test/results/clientpositive/llap/vector_tablesample_rows.q.out ql/src/test/results/clientpositive/llap/vector_tablesample_rows.q.out index 56fb85c..aa63cb1 100644 --- ql/src/test/results/clientpositive/llap/vector_tablesample_rows.q.out +++ ql/src/test/results/clientpositive/llap/vector_tablesample_rows.q.out @@ -264,7 +264,7 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vectorization_15.q.out ql/src/test/results/clientpositive/llap/vectorization_15.q.out index 02b221d..531d471 100644 --- ql/src/test/results/clientpositive/llap/vectorization_15.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_15.q.out @@ -239,15 +239,15 @@ POSTHOOK: Input: default@alltypesorc -51.0 true NULL QiOcvR0kt6r7f0R7fiPxQTCU -51 266531954 1969-12-31 16:00:08.451 0.0 -266531980.28 NULL NULL 33.0 0.0 0.0 NULL 51 0.0 2.66532E8 -23 266531980.28 0.0 -51.0 true NULL Ybpj38RTTYl7CnJXPNx1g4C -51 -370919370 1969-12-31 16:00:08.451 0.0 370919343.72 NULL NULL 33.0 0.0 0.0 NULL 51 0.0 -3.70919296E8 -23 -370919343.72 0.0 -6.0 NULL -200.0 NULL -6 NULL 1969-12-31 15:59:56.094 0.0 NULL -200.0 -15910.599999999999 3.0 0.0 0.0 -23.0 6 NULL NULL -5 NULL NULL --62.0 NULL 15601.0 NULL -62 NULL 1969-12-31 16:00:09.889 0.0 NULL 15601.0 1241106.353 33.0 0.0 0.0 -23.0 62 NULL NULL -23 NULL NULL +-62.0 NULL 15601.0 NULL -62 NULL 1969-12-31 15:59:56.527 0.0 NULL 15601.0 1241106.353 33.0 0.0 0.0 -23.0 62 NULL NULL -23 NULL NULL 11.0 false NULL 10pO8p1LNx4Y 11 271296824 1969-12-31 16:00:02.351 0.0 -271296850.28 NULL NULL 0.0 0.0 0.0 NULL -11 0.0 2.71296832E8 -1 271296850.28 0.0 11.0 false NULL 1H6wGP 11 -560827082 1969-12-31 16:00:02.351 0.0 560827055.72 NULL NULL 0.0 0.0 0.0 NULL -11 0.0 -5.6082707E8 -1 -560827055.72 0.0 11.0 false NULL 2a7V63IL7jK3o 11 -325931647 1969-12-31 16:00:02.351 0.0 325931620.72 NULL NULL 0.0 0.0 0.0 NULL -11 0.0 -3.25931648E8 -1 -325931620.72 0.0 11.0 true NULL 10 11 92365813 1969-12-31 16:00:02.351 0.0 -92365839.28 NULL NULL 0.0 0.0 0.0 NULL -11 0.0 9.2365808E7 -1 92365839.28 0.0 -21.0 NULL 15601.0 NULL 21 NULL 1969-12-31 16:00:14.256 0.0 NULL 15601.0 1241106.353 12.0 0.0 0.0 -23.0 -21 NULL NULL -2 NULL NULL +21.0 NULL 15601.0 NULL 21 NULL 1969-12-31 15:59:56.527 0.0 NULL 15601.0 1241106.353 12.0 0.0 0.0 -23.0 -21 NULL NULL -2 NULL NULL 32.0 NULL -200.0 NULL 32 NULL 1969-12-31 16:00:02.445 0.0 NULL -200.0 -15910.599999999999 1.0 0.0 0.0 -23.0 -32 NULL NULL -23 NULL NULL 36.0 NULL -200.0 NULL 36 NULL 1969-12-31 16:00:00.554 0.0 NULL -200.0 -15910.599999999999 33.0 0.0 0.0 -23.0 -36 NULL NULL -23 NULL NULL -5.0 NULL 15601.0 NULL 5 NULL 1969-12-31 16:00:00.959 0.0 NULL 15601.0 1241106.353 3.0 0.0 0.0 -23.0 -5 NULL NULL -3 NULL NULL +5.0 NULL 15601.0 NULL 5 NULL 1969-12-31 15:59:56.527 0.0 NULL 15601.0 1241106.353 3.0 0.0 0.0 -23.0 -5 NULL NULL -3 NULL NULL 58.0 NULL 15601.0 NULL 58 NULL 1969-12-31 15:59:56.527 0.0 NULL 15601.0 1241106.353 33.0 0.0 0.0 -23.0 -58 NULL NULL -23 NULL NULL 8.0 false NULL 10V3pN5r5lI2qWl2lG103 8 -362835731 1969-12-31 16:00:15.892 0.0 362835704.72 NULL NULL 1.0 0.0 0.0 NULL -8 0.0 -3.62835744E8 -7 -362835704.72 0.0 8.0 false NULL 10c4qt584m5y6uWT 8 -183000142 1969-12-31 16:00:15.892 0.0 183000115.72 NULL NULL 1.0 0.0 0.0 NULL -8 0.0 -1.8300016E8 -7 -183000115.72 0.0 diff --git ql/src/test/results/clientpositive/llap/vectorization_div0.q.out ql/src/test/results/clientpositive/llap/vectorization_div0.q.out index 2b61979..37d05c8 100644 --- ql/src/test/results/clientpositive/llap/vectorization_div0.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_div0.q.out @@ -1,27 +1,71 @@ -PREHOOK: query: -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants -explain +PREHOOK: query: explain vectorization expression select cdouble / 0.0 from alltypesorc limit 100 PREHOOK: type: QUERY -POSTHOOK: query: -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants -explain +POSTHOOK: query: explain vectorization expression select cdouble / 0.0 from alltypesorc limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Select Operator + expressions: (cdouble / 0.0) (type: double) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12] + selectExpressions: DoubleColDivideDoubleScalar(col 5, val 0.0) -> 12:double + Statistics: Num rows: 12288 Data size: 98304 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 100 Processor Tree: - TableScan - alias: alltypesorc - Select Operator - expressions: (cdouble / 0.0) (type: double) - outputColumnNames: _col0 - Limit - Number of rows: 100 - ListSink + ListSink PREHOOK: query: select cdouble / 0.0 from alltypesorc limit 100 PREHOOK: type: QUERY @@ -131,20 +175,18 @@ NULL NULL NULL NULL -PREHOOK: query: -- There are no zeros in the table, but there is 988888, so use it as zero - --- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants as numerators -explain +PREHOOK: query: explain vectorization expression select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100 PREHOOK: type: QUERY -POSTHOOK: query: -- There are no zeros in the table, but there is 988888, so use it as zero - --- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants as numerators -explain +POSTHOOK: query: explain vectorization expression select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -152,6 +194,7 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -160,36 +203,80 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 146792 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3, val 0) -> boolean, FilterLongColLessLongScalar(col 3, val 100000000) -> boolean) -> boolean predicate: ((cbigint > 0) and (cbigint < 100000000)) (type: boolean) - Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 16320 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: (cbigint - 988888) (type: bigint), (cdouble / UDFToDouble((cbigint - 988888))) (type: double), (1.2 / UDFToDouble((cbigint - 988888))) (type: double) + expressions: (cbigint - 988888) (type: bigint), (cdouble / UDFToDouble((cbigint - 988888))) (type: double), (1.2 / CAST( (cbigint - 988888) AS decimal(19,0))) (type: decimal(22,21)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 15, 17] + selectExpressions: LongColSubtractLongScalar(col 3, val 988888) -> 12:long, DoubleColDivideDoubleColumn(col 5, col 14)(children: CastLongToDouble(col 13)(children: LongColSubtractLongScalar(col 3, val 988888) -> 13:long) -> 14:double) -> 15:double, DecimalScalarDivideDecimalColumn(val 1.2, col 16)(children: CastLongToDecimal(col 13)(children: LongColSubtractLongScalar(col 3, val 988888) -> 13:long) -> 16:decimal(19,0)) -> 17:decimal(22,21) + Statistics: Num rows: 1365 Data size: 174720 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: double) sort order: ++ - Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + Statistics: Num rows: 1365 Data size: 174720 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: decimal(22,21)) Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, uber + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: double) + expressions: KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: decimal(22,21)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 1365 Data size: 174720 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 - Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 100 Data size: 12800 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 100 Data size: 12800 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -208,9 +295,9 @@ from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --985319 NULL -1.21787969175465E-6 --985319 2.0297994862577501E-4 -1.21787969175465E-6 --63925 0.11256941728588189 -1.8771998435666796E-5 +-985319 NULL -0.000001217879691754650 +-985319 2.0297994862577501E-4 -0.000001217879691754650 +-63925 0.11256941728588189 -0.000018771998435666797 0 NULL NULL 0 NULL NULL 0 NULL NULL @@ -220,106 +307,106 @@ POSTHOOK: Input: default@alltypesorc 0 NULL NULL 0 NULL NULL 0 NULL NULL -392309 NULL 3.05881333336732E-6 -673083 -0.010691103474608629 1.7828410463494101E-6 -2331159 NULL 5.147654021025593E-7 -2342037 NULL 5.123744842630582E-7 -3533105 -5.660743170667161E-5 3.3964459024002967E-7 -3768727 0.004139594085748318 3.184099033970887E-7 -4728619 NULL 2.5377388197272816E-7 -5391403 NULL 2.2257657236901044E-7 -7022666 -0.0010246820794268159 1.708752772807364E-7 -7470430 NULL 1.6063332365071354E-7 -8276429 NULL 1.4499006757624573E-7 -8286860 -8.683626850218298E-4 1.44807562816314E-7 -8299981 -8.669899364829872E-4 1.445786442161735E-7 -9247593 NULL 1.297634962957388E-7 -9821695 -7.326637611939691E-4 1.2217850381222386E-7 -10000738 0.001559984873116364 1.1999114465352456E-7 -10081828 0.0015474376273826532 1.190260337708598E-7 -10745355 -6.696847149303117E-4 1.1167616146697805E-7 -11127199 -1.797397530142132E-5 1.0784385180852791E-7 -11722580 NULL 1.023665438836843E-7 -12649396 NULL 9.486618965838368E-8 -13126214 -1.5236685917203544E-5 9.142011550322126E-8 -14042667 NULL 8.545385288991044E-8 -14943972 -1.3383322720358416E-5 8.02999363221505E-8 -16259022 NULL 7.380517721176587E-8 -16531556 -1.2098074736582569E-5 7.258844841949542E-8 -16596157 NULL 7.230589587697923E-8 -17058489 -1.1724367849930905E-5 7.034620709958544E-8 -17247320 -4.172242412154468E-4 6.957602688417679E-8 -19004427 8.209139901981786E-4 6.314318237534864E-8 -19498517 NULL 6.154314197331007E-8 -20165679 7.736411950224934E-4 5.95070466013071E-8 -20547875 NULL 5.840019953401507E-8 -23264783 NULL 5.158010715165492E-8 -23475527 6.645644206411213E-4 5.111706331448917E-8 -24379905 NULL 4.922086447834805E-8 -24514624 -2.935390728407664E-4 4.895037345871591E-8 -25154198 -2.860755091456305E-4 4.770575472133916E-8 -25245192 -7.922300610745999E-6 4.7533803664475993E-8 -26610943 NULL 4.509423059528556E-8 -27520143 5.668938566198584E-4 4.360442458456702E-8 -27818379 NULL 4.313694913711543E-8 -28400244 NULL 4.225315810666979E-8 -28698999 5.43607810153936E-4 4.18133050563889E-8 -28806400 -6.9429015774272385E-6 4.165740946456343E-8 -29920877 5.214085135271938E-4 4.010577631130264E-8 -33126539 NULL 3.622473207961749E-8 -34603086 NULL 3.467898787986713E-8 -35156265 NULL 3.413331876978399E-8 -35862260 NULL 3.346136021544654E-8 -36123797 -1.992038655294182E-4 3.321909931007529E-8 -36341671 -1.980096072082101E-4 3.301994561559924E-8 -36413215 -5.4925114412446145E-6 3.2955068647467685E-8 -36578596 4.2650625518814335E-4 3.280607052277239E-8 -36796441 -1.955623914823719E-4 3.2611849607955287E-8 -39723587 NULL 3.0208752296211316E-8 -39985709 -1.7996429674411925E-4 3.001072208073139E-8 -40018606 NULL 2.998605198791782E-8 -41003161 NULL 2.9266036342905367E-8 -41158231 3.790493328053871E-4 2.9155772025284565E-8 -41848817 NULL 2.8674645689506587E-8 -44047567 -1.633688416888043E-4 2.724327543448654E-8 -45125678 NULL 2.6592398234991615E-8 -45180154 NULL 2.6560334433565674E-8 -45717793 3.4124569399052136E-4 2.6247986205283355E-8 -46163162 NULL 2.5994753132378583E-8 -46525838 3.353190543284787E-4 2.5792120068852925E-8 -48626663 NULL 2.4677819244968545E-8 -49102701 -1.465499830650864E-4 2.4438574163160596E-8 -50300445 -1.4306036457530346E-4 2.3856647789100076E-8 -50929325 -1.412938420055636E-4 2.356206370298448E-8 -52422534 -1.3726921327381848E-4 2.2890919389741823E-8 -52667422 2.9621727070673783E-4 2.2784483356713376E-8 -52962061 2.945693522010029E-4 2.265772852004381E-8 -53695172 NULL 2.234837798824818E-8 -54760317 NULL 2.1913678841559662E-8 -55020655 2.835480602693661E-4 2.180999117513232E-8 -56102034 NULL 2.1389598815615135E-8 -56131313 NULL 2.13784416551952E-8 -56838351 -3.5187509222426247E-6 2.1112505533455745E-8 -56997841 -3.5089048372902406E-6 2.105342902374144E-8 -57778807 -1.2454393528755274E-4 2.076886080392764E-8 -58080381 NULL 2.0661021490199935E-8 -58307527 NULL 2.058053328174937E-8 -58536385 -1.2293208745295768E-4 2.0500070170031853E-8 -59347745 NULL 2.0219807846111087E-8 -60229567 NULL 1.992376933408802E-8 -60330397 NULL 1.9890470801974003E-8 -PREHOOK: query: -- There are no zeros in the table, but there is -200.0, so use it as zero - -explain +392309 NULL 0.000003058813333367320 +673083 -0.010691103474608629 0.000001782841046349410 +2331159 NULL 0.000000514765402102559 +2342037 NULL 0.000000512374484263058 +3533105 -5.660743170667161E-5 0.000000339644590240030 +3768727 0.004139594085748318 0.000000318409903397089 +4728619 NULL 0.000000253773881972728 +5391403 NULL 0.000000222576572369010 +7022666 -0.0010246820794268159 0.000000170875277280736 +7470430 NULL 0.000000160633323650714 +8276429 NULL 0.000000144990067576246 +8286860 -8.683626850218298E-4 0.000000144807562816314 +8299981 -8.669899364829872E-4 0.000000144578644216174 +9247593 NULL 0.000000129763496295739 +9821695 -7.326637611939691E-4 0.000000122178503812224 +10000738 0.001559984873116364 0.000000119991144653525 +10081828 0.0015474376273826532 0.000000119026033770860 +10745355 -6.696847149303117E-4 0.000000111676161466978 +11127199 -1.797397530142132E-5 0.000000107843851808528 +11722580 NULL 0.000000102366543883684 +12649396 NULL 0.000000094866189658384 +13126214 -1.5236685917203544E-5 0.000000091420115503221 +14042667 NULL 0.000000085453852889910 +14943972 -1.3383322720358416E-5 0.000000080299936322150 +16259022 NULL 0.000000073805177211766 +16531556 -1.2098074736582569E-5 0.000000072588448419495 +16596157 NULL 0.000000072305895876979 +17058489 -1.1724367849930905E-5 0.000000070346207099585 +17247320 -4.172242412154468E-4 0.000000069576026884177 +19004427 8.209139901981786E-4 0.000000063143182375349 +19498517 NULL 0.000000061543141973310 +20165679 7.736411950224934E-4 0.000000059507046601307 +20547875 NULL 0.000000058400199534015 +23264783 NULL 0.000000051580107151655 +23475527 6.645644206411213E-4 0.000000051117063314489 +24379905 NULL 0.000000049220864478348 +24514624 -2.935390728407664E-4 0.000000048950373458716 +25154198 -2.860755091456305E-4 0.000000047705754721339 +25245192 -7.922300610745999E-6 0.000000047533803664476 +26610943 NULL 0.000000045094230595286 +27520143 5.668938566198584E-4 0.000000043604424584567 +27818379 NULL 0.000000043136949137115 +28400244 NULL 0.000000042253158106670 +28698999 5.43607810153936E-4 0.000000041813305056389 +28806400 -6.9429015774272385E-6 0.000000041657409464563 +29920877 5.214085135271938E-4 0.000000040105776311303 +33126539 NULL 0.000000036224732079617 +34603086 NULL 0.000000034678987879867 +35156265 NULL 0.000000034133318769784 +35862260 NULL 0.000000033461360215447 +36123797 -1.992038655294182E-4 0.000000033219099310075 +36341671 -1.980096072082101E-4 0.000000033019945615599 +36413215 -5.4925114412446145E-6 0.000000032955068647468 +36578596 4.2650625518814335E-4 0.000000032806070522772 +36796441 -1.955623914823719E-4 0.000000032611849607955 +39723587 NULL 0.000000030208752296211 +39985709 -1.7996429674411925E-4 0.000000030010722080731 +40018606 NULL 0.000000029986051987918 +41003161 NULL 0.000000029266036342905 +41158231 3.790493328053871E-4 0.000000029155772025285 +41848817 NULL 0.000000028674645689507 +44047567 -1.633688416888043E-4 0.000000027243275434487 +45125678 NULL 0.000000026592398234992 +45180154 NULL 0.000000026560334433566 +45717793 3.4124569399052136E-4 0.000000026247986205283 +46163162 NULL 0.000000025994753132379 +46525838 3.353190543284787E-4 0.000000025792120068853 +48626663 NULL 0.000000024677819244969 +49102701 -1.465499830650864E-4 0.000000024438574163161 +50300445 -1.4306036457530346E-4 0.000000023856647789100 +50929325 -1.412938420055636E-4 0.000000023562063702984 +52422534 -1.3726921327381848E-4 0.000000022890919389742 +52667422 2.9621727070673783E-4 0.000000022784483356713 +52962061 2.945693522010029E-4 0.000000022657728520044 +53695172 NULL 0.000000022348377988248 +54760317 NULL 0.000000021913678841560 +55020655 2.835480602693661E-4 0.000000021809991175132 +56102034 NULL 0.000000021389598815615 +56131313 NULL 0.000000021378441655195 +56838351 -3.5187509222426247E-6 0.000000021112505533456 +56997841 -3.5089048372902406E-6 0.000000021053429023741 +57778807 -1.2454393528755274E-4 0.000000020768860803928 +58080381 NULL 0.000000020661021490200 +58307527 NULL 0.000000020580533281749 +58536385 -1.2293208745295768E-4 0.000000020500070170032 +59347745 NULL 0.000000020219807846111 +60229567 NULL 0.000000019923769334088 +60330397 NULL 0.000000019890470801974 +PREHOOK: query: explain vectorization expression select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100 PREHOOK: type: QUERY -POSTHOOK: query: -- There are no zeros in the table, but there is -200.0, so use it as zero - -explain +POSTHOOK: query: explain vectorization expression select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -327,6 +414,7 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -335,36 +423,80 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 146792 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5, val -500.0) -> boolean, FilterDoubleColLessDoubleScalar(col 5, val -199.0) -> boolean) -> boolean predicate: ((cdouble >= -500.0) and (cdouble < -199.0)) (type: boolean) - Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 16320 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (cdouble + 200.0) (type: double), (UDFToDouble(cbigint) / (cdouble + 200.0)) (type: double), ((cdouble + 200.0) / (cdouble + 200.0)) (type: double), (3.0 / (cdouble + 200.0)) (type: double), (1.2 / (cdouble + 200.0)) (type: double) outputColumnNames: _col0, _col1, _col2, _col4, _col5 - Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 15, 16, 14, 17] + selectExpressions: DoubleColAddDoubleScalar(col 5, val 200.0) -> 12:double, DoubleColDivideDoubleColumn(col 13, col 14)(children: CastLongToDouble(col 3) -> 13:double, DoubleColAddDoubleScalar(col 5, val 200.0) -> 14:double) -> 15:double, DoubleColDivideDoubleColumn(col 13, col 14)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double, DoubleColAddDoubleScalar(col 5, val 200.0) -> 14:double) -> 16:double, DoubleScalarDivideDoubleColumn(val 3.0, col 13)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double) -> 14:double, DoubleScalarDivideDoubleColumn(val 1.2, col 13)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double) -> 17:double + Statistics: Num rows: 1365 Data size: 65520 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: double) sort order: ++ - Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + Statistics: Num rows: 1365 Data size: 65520 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: double), _col4 (type: double), _col5 (type: double) Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, uber + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 1, 3, 4] + Statistics: Num rows: 1365 Data size: 65520 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 - Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 100 Data size: 4800 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 100 Data size: 4800 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 diff --git ql/src/test/results/clientpositive/llap/vectorization_limit.q.out ql/src/test/results/clientpositive/llap/vectorization_limit.q.out index afaf2d6..5700005 100644 --- ql/src/test/results/clientpositive/llap/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_limit.q.out @@ -1,26 +1,59 @@ WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +PREHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 PREHOOK: type: QUERY -POSTHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +POSTHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 183488 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean) + Statistics: Num rows: 1365 Data size: 20400 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cbigint (type: bigint), cdouble (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1365 Data size: 16320 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 7 + Statistics: Num rows: 7 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 7 Processor Tree: - TableScan - alias: alltypesorc - Filter Operator - predicate: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean) - Select Operator - expressions: cbigint (type: bigint), cdouble (type: double) - outputColumnNames: _col0, _col1 - Limit - Number of rows: 7 - ListSink + ListSink WARNING: Comparing a bigint and a double may result in a loss of precision. PREHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 @@ -38,16 +71,16 @@ POSTHOOK: Input: default@alltypesorc -1887561756 -8881.0 -1887561756 -2281.0 -1887561756 9531.0 -PREHOOK: query: -- HIVE-3562 Some limit can be pushed down to map stage - c/p parts from limit_pushdown - -explain +PREHOOK: query: explain vectorization expression select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 PREHOOK: type: QUERY -POSTHOOK: query: -- HIVE-3562 Some limit can be pushed down to map stage - c/p parts from limit_pushdown - -explain +POSTHOOK: query: explain vectorization expression select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -55,6 +88,7 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -63,37 +97,79 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 146796 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: ctinyint is not null (type: boolean) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1] + Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col2 (type: smallint) Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, uber + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -130,14 +206,16 @@ POSTHOOK: Input: default@alltypesorc -64 -2919.0 -2919 -64 -1600.0 -1600 -64 -200.0 -200 -PREHOOK: query: -- deduped RS -explain +PREHOOK: query: explain vectorization expression select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 PREHOOK: type: QUERY -POSTHOOK: query: -- deduped RS -explain +POSTHOOK: query: explain vectorization expression select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -145,6 +223,7 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -153,43 +232,73 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint), (cdouble + 1.0) (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 12] + selectExpressions: DoubleColAddDoubleScalar(col 5, val 1.0) -> 12:double + Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(_col1) + Group By Vectorization: + aggregators: VectorUDAFAvgDouble(col 12) -> struct + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDouble(col 12) -> struct output type STRUCT requires PRIMITIVE IS false keys: _col0 (type: tinyint) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 7888 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 7888 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: struct) Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: uber + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0) keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -226,14 +335,16 @@ NULL 9370.0945309795 -48 1672.909090909091 -47 -574.6428571428571 -46 3033.55 -PREHOOK: query: -- distincts -explain +PREHOOK: query: explain vectorization expression select distinct(ctinyint) from alltypesorc limit 20 PREHOOK: type: QUERY -POSTHOOK: query: -- distincts -explain +POSTHOOK: query: explain vectorization expression select distinct(ctinyint) from alltypesorc limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -241,6 +352,7 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -249,40 +361,86 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: ctinyint (type: tinyint) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false + Statistics: Num rows: 95 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, uber + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -319,12 +477,16 @@ NULL -48 -47 -46 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -332,6 +494,7 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -340,45 +503,98 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double) outputColumnNames: ctinyint, cdouble - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5] + Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 5 + native: false + projectedOutputColumns: [] keys: ctinyint (type: tinyint), cdouble (type: double) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: uber + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: tinyint), KEY._col1 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1) + Group By Vectorization: + aggregators: VectorUDAFCount(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -415,14 +631,16 @@ NULL 2932 -48 29 -47 22 -46 24 -PREHOOK: query: -- limit zero -explain +PREHOOK: query: explain vectorization expression select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 PREHOOK: type: QUERY -POSTHOOK: query: -- limit zero -explain +POSTHOOK: query: explain vectorization expression select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -441,14 +659,16 @@ POSTHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint limi POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -PREHOOK: query: -- 2MR (applied to last RS) -explain +PREHOOK: query: explain vectorization expression select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 PREHOOK: type: QUERY -POSTHOOK: query: -- 2MR (applied to last RS) -explain +POSTHOOK: query: explain vectorization expression select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -456,6 +676,7 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) @@ -465,57 +686,124 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: ctinyint is not null (type: boolean) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9173 Data size: 82188 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cdouble (type: double), ctinyint (type: tinyint) outputColumnNames: cdouble, ctinyint - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 0] + Statistics: Num rows: 9173 Data size: 82188 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 5 + native: false + projectedOutputColumns: [0] keys: cdouble (type: double) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3185 Data size: 44512 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 3185 Data size: 44512 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3185 Data size: 44512 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: bigint), _col0 (type: double) sort order: ++ - Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + Statistics: Num rows: 3185 Data size: 44512 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Reducer 3 - Execution mode: vectorized, uber + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: double), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] + Statistics: Num rows: 3185 Data size: 44512 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 diff --git ql/src/test/results/clientpositive/llap/vectorization_offset_limit.q.out ql/src/test/results/clientpositive/llap/vectorization_offset_limit.q.out index d5a1f47..8be0cac 100644 --- ql/src/test/results/clientpositive/llap/vectorization_offset_limit.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_offset_limit.q.out @@ -118,7 +118,7 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 diff --git ql/src/test/results/clientpositive/llap/vectorized_mapjoin2.q.out ql/src/test/results/clientpositive/llap/vectorized_mapjoin2.q.out index 7621cbe..fbe0b6c 100644 --- ql/src/test/results/clientpositive/llap/vectorized_mapjoin2.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_mapjoin2.q.out @@ -103,7 +103,7 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) @@ -147,7 +147,7 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 45 Data size: 181 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs diff --git ql/src/test/results/clientpositive/spark/groupby_grouping_id2.q.out ql/src/test/results/clientpositive/spark/groupby_grouping_id2.q.out index be5db9c..d7f83d5 100644 --- ql/src/test/results/clientpositive/spark/groupby_grouping_id2.q.out +++ ql/src/test/results/clientpositive/spark/groupby_grouping_id2.q.out @@ -22,6 +22,7 @@ POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### +key value grouping__id _c3 1 1 0 1 1 NULL 0 1 1 NULL 1 2 @@ -41,6 +42,7 @@ POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY ROLL POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### +key value grouping__id _c3 1 1 0 1 1 NULL 0 1 1 NULL 1 2 @@ -70,6 +72,7 @@ GROUP BY GROUPING__ID POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### +grouping__id _c1 0 6 1 4 3 1 @@ -91,6 +94,7 @@ GROUP BY GROUPING__ID POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### +grouping__id _c1 0 6 1 4 3 1 @@ -108,6 +112,7 @@ ON t1.GROUPING__ID = t2.GROUPING__ID POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### +t1.grouping__id t2.grouping__id 0 0 0 0 0 0 @@ -175,6 +180,7 @@ ON t1.GROUPING__ID = t2.GROUPING__ID POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### +t1.grouping__id t2.grouping__id 0 0 0 0 0 0 @@ -236,6 +242,7 @@ POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### +key value grouping__id _c3 1 1 0 1 1 NULL 0 1 1 NULL 1 2 @@ -265,6 +272,7 @@ GROUP BY GROUPING__ID POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### +grouping__id _c1 0 6 1 4 3 1 @@ -282,6 +290,7 @@ ON t1.GROUPING__ID = t2.GROUPING__ID POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### +t1.grouping__id t2.grouping__id 0 0 0 0 0 0 diff --git ql/src/test/results/clientpositive/spark/vectorization_15.q.out ql/src/test/results/clientpositive/spark/vectorization_15.q.out index 1d925c5..e35c87d 100644 --- ql/src/test/results/clientpositive/spark/vectorization_15.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_15.q.out @@ -236,15 +236,15 @@ POSTHOOK: Input: default@alltypesorc -51.0 true NULL QiOcvR0kt6r7f0R7fiPxQTCU -51 266531954 1969-12-31 16:00:08.451 0.0 -266531980.28 NULL NULL 33.0 0.0 0.0 NULL 51 0.0 2.66532E8 -23 266531980.28 0.0 -51.0 true NULL Ybpj38RTTYl7CnJXPNx1g4C -51 -370919370 1969-12-31 16:00:08.451 0.0 370919343.72 NULL NULL 33.0 0.0 0.0 NULL 51 0.0 -3.70919296E8 -23 -370919343.72 0.0 -6.0 NULL -200.0 NULL -6 NULL 1969-12-31 15:59:56.094 0.0 NULL -200.0 -15910.599999999999 3.0 0.0 0.0 -23.0 6 NULL NULL -5 NULL NULL --62.0 NULL 15601.0 NULL -62 NULL 1969-12-31 16:00:09.889 0.0 NULL 15601.0 1241106.353 33.0 0.0 0.0 -23.0 62 NULL NULL -23 NULL NULL +-62.0 NULL 15601.0 NULL -62 NULL 1969-12-31 15:59:56.527 0.0 NULL 15601.0 1241106.353 33.0 0.0 0.0 -23.0 62 NULL NULL -23 NULL NULL 11.0 false NULL 10pO8p1LNx4Y 11 271296824 1969-12-31 16:00:02.351 0.0 -271296850.28 NULL NULL 0.0 0.0 0.0 NULL -11 0.0 2.71296832E8 -1 271296850.28 0.0 11.0 false NULL 1H6wGP 11 -560827082 1969-12-31 16:00:02.351 0.0 560827055.72 NULL NULL 0.0 0.0 0.0 NULL -11 0.0 -5.6082707E8 -1 -560827055.72 0.0 11.0 false NULL 2a7V63IL7jK3o 11 -325931647 1969-12-31 16:00:02.351 0.0 325931620.72 NULL NULL 0.0 0.0 0.0 NULL -11 0.0 -3.25931648E8 -1 -325931620.72 0.0 11.0 true NULL 10 11 92365813 1969-12-31 16:00:02.351 0.0 -92365839.28 NULL NULL 0.0 0.0 0.0 NULL -11 0.0 9.2365808E7 -1 92365839.28 0.0 -21.0 NULL 15601.0 NULL 21 NULL 1969-12-31 16:00:14.256 0.0 NULL 15601.0 1241106.353 12.0 0.0 0.0 -23.0 -21 NULL NULL -2 NULL NULL +21.0 NULL 15601.0 NULL 21 NULL 1969-12-31 15:59:56.527 0.0 NULL 15601.0 1241106.353 12.0 0.0 0.0 -23.0 -21 NULL NULL -2 NULL NULL 32.0 NULL -200.0 NULL 32 NULL 1969-12-31 16:00:02.445 0.0 NULL -200.0 -15910.599999999999 1.0 0.0 0.0 -23.0 -32 NULL NULL -23 NULL NULL 36.0 NULL -200.0 NULL 36 NULL 1969-12-31 16:00:00.554 0.0 NULL -200.0 -15910.599999999999 33.0 0.0 0.0 -23.0 -36 NULL NULL -23 NULL NULL -5.0 NULL 15601.0 NULL 5 NULL 1969-12-31 16:00:00.959 0.0 NULL 15601.0 1241106.353 3.0 0.0 0.0 -23.0 -5 NULL NULL -3 NULL NULL +5.0 NULL 15601.0 NULL 5 NULL 1969-12-31 15:59:56.527 0.0 NULL 15601.0 1241106.353 3.0 0.0 0.0 -23.0 -5 NULL NULL -3 NULL NULL 58.0 NULL 15601.0 NULL 58 NULL 1969-12-31 15:59:56.527 0.0 NULL 15601.0 1241106.353 33.0 0.0 0.0 -23.0 -58 NULL NULL -23 NULL NULL 8.0 false NULL 10V3pN5r5lI2qWl2lG103 8 -362835731 1969-12-31 16:00:15.892 0.0 362835704.72 NULL NULL 1.0 0.0 0.0 NULL -8 0.0 -3.62835744E8 -7 -362835704.72 0.0 8.0 false NULL 10c4qt584m5y6uWT 8 -183000142 1969-12-31 16:00:15.892 0.0 183000115.72 NULL NULL 1.0 0.0 0.0 NULL -8 0.0 -1.8300016E8 -7 -183000115.72 0.0 diff --git ql/src/test/results/clientpositive/vector_count.q.out ql/src/test/results/clientpositive/vector_count.q.out index 23d5099..3de9ab1 100644 --- ql/src/test/results/clientpositive/vector_count.q.out +++ ql/src/test/results/clientpositive/vector_count.q.out @@ -62,26 +62,12 @@ STAGE PLANS: TableScan alias: abcd Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: a (type: int), b (type: int), c (type: int), d (type: int) outputColumnNames: a, b, c, d - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT b), count(DISTINCT c), sum(d) - Group By Vectorization: - aggregators: VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFSumLong(col 3) -> bigint - className: VectorGroupByOperator - vectorOutput: true - keyExpressions: col 0, col 1, col 2 - native: false - projectedOutputColumns: [0, 1, 2] keys: a (type: int), b (type: int), c (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -90,22 +76,14 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false, No DISTINCT columns IS false Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: bigint) - Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported + vectorized: false Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -166,26 +144,12 @@ STAGE PLANS: TableScan alias: abcd Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: a (type: int), b (type: int), c (type: int), d (type: int) outputColumnNames: _col1, _col2, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1), count(), count(_col1), count(_col2), count(_col3), count(_col4), count(DISTINCT _col1), count(DISTINCT _col2), count(DISTINCT _col3), count(DISTINCT _col4), count(DISTINCT _col1, _col2), count(DISTINCT _col2, _col3), count(DISTINCT _col3, _col4), count(DISTINCT _col1, _col4), count(DISTINCT _col1, _col3), count(DISTINCT _col2, _col4), count(DISTINCT _col1, _col2, _col3), count(DISTINCT _col2, _col3, _col4), count(DISTINCT _col1, _col3, _col4), count(DISTINCT _col1, _col2, _col4), count(DISTINCT _col1, _col2, _col3, _col4) - Group By Vectorization: - aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 4:long) -> bigint, VectorUDAFCountStar(*) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFCount(col 3) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFCount(col 3) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 0) -> bigint - className: VectorGroupByOperator - vectorOutput: true - keyExpressions: col 0, col 1, col 2, col 3 - native: false - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] keys: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 @@ -193,22 +157,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) sort order: ++++ - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No buckets IS true, No TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false, No DISTINCT columns IS false Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint) - Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported + vectorized: false Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true diff --git ql/src/test/results/clientpositive/vector_grouping_sets.q.out ql/src/test/results/clientpositive/vector_grouping_sets.q.out index bd95aa4..5c9909e 100644 --- ql/src/test/results/clientpositive/vector_grouping_sets.q.out +++ ql/src/test/results/clientpositive/vector_grouping_sets.q.out @@ -148,11 +148,24 @@ STAGE PLANS: TableScan alias: store Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] Select Operator expressions: s_store_id (type: string) outputColumnNames: s_store_id + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1, ConstantVectorExpression(val 0) -> 29:long + native: false + projectedOutputColumns: [] keys: s_store_id (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -161,13 +174,21 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: GROUPBY operator: Grouping sets not supported - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -241,11 +262,24 @@ STAGE PLANS: TableScan alias: store Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] Select Operator expressions: s_store_id (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1, ConstantVectorExpression(val 0) -> 29:long + native: false + projectedOutputColumns: [] keys: _col0 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -254,13 +288,21 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: GROUPBY operator: Grouping sets not supported - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -347,6 +389,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int) diff --git ql/src/test/results/clientpositive/vectorization_15.q.out ql/src/test/results/clientpositive/vectorization_15.q.out index 0195f2f..35667db 100644 --- ql/src/test/results/clientpositive/vectorization_15.q.out +++ ql/src/test/results/clientpositive/vectorization_15.q.out @@ -239,15 +239,15 @@ POSTHOOK: Input: default@alltypesorc -51.0 true NULL QiOcvR0kt6r7f0R7fiPxQTCU -51 266531954 1969-12-31 16:00:08.451 0.0 -266531980.28 NULL NULL 33.0 0.0 0.0 NULL 51 0.0 2.66532E8 -23 266531980.28 0.0 -51.0 true NULL Ybpj38RTTYl7CnJXPNx1g4C -51 -370919370 1969-12-31 16:00:08.451 0.0 370919343.72 NULL NULL 33.0 0.0 0.0 NULL 51 0.0 -3.70919296E8 -23 -370919343.72 0.0 -6.0 NULL -200.0 NULL -6 NULL 1969-12-31 15:59:56.094 0.0 NULL -200.0 -15910.599999999999 3.0 0.0 0.0 -23.0 6 NULL NULL -5 NULL NULL --62.0 NULL 15601.0 NULL -62 NULL 1969-12-31 16:00:09.889 0.0 NULL 15601.0 1241106.353 33.0 0.0 0.0 -23.0 62 NULL NULL -23 NULL NULL +-62.0 NULL 15601.0 NULL -62 NULL 1969-12-31 15:59:56.527 0.0 NULL 15601.0 1241106.353 33.0 0.0 0.0 -23.0 62 NULL NULL -23 NULL NULL 11.0 false NULL 10pO8p1LNx4Y 11 271296824 1969-12-31 16:00:02.351 0.0 -271296850.28 NULL NULL 0.0 0.0 0.0 NULL -11 0.0 2.71296832E8 -1 271296850.28 0.0 11.0 false NULL 1H6wGP 11 -560827082 1969-12-31 16:00:02.351 0.0 560827055.72 NULL NULL 0.0 0.0 0.0 NULL -11 0.0 -5.6082707E8 -1 -560827055.72 0.0 11.0 false NULL 2a7V63IL7jK3o 11 -325931647 1969-12-31 16:00:02.351 0.0 325931620.72 NULL NULL 0.0 0.0 0.0 NULL -11 0.0 -3.25931648E8 -1 -325931620.72 0.0 11.0 true NULL 10 11 92365813 1969-12-31 16:00:02.351 0.0 -92365839.28 NULL NULL 0.0 0.0 0.0 NULL -11 0.0 9.2365808E7 -1 92365839.28 0.0 -21.0 NULL 15601.0 NULL 21 NULL 1969-12-31 16:00:14.256 0.0 NULL 15601.0 1241106.353 12.0 0.0 0.0 -23.0 -21 NULL NULL -2 NULL NULL +21.0 NULL 15601.0 NULL 21 NULL 1969-12-31 15:59:56.527 0.0 NULL 15601.0 1241106.353 12.0 0.0 0.0 -23.0 -21 NULL NULL -2 NULL NULL 32.0 NULL -200.0 NULL 32 NULL 1969-12-31 16:00:02.445 0.0 NULL -200.0 -15910.599999999999 1.0 0.0 0.0 -23.0 -32 NULL NULL -23 NULL NULL 36.0 NULL -200.0 NULL 36 NULL 1969-12-31 16:00:00.554 0.0 NULL -200.0 -15910.599999999999 33.0 0.0 0.0 -23.0 -36 NULL NULL -23 NULL NULL -5.0 NULL 15601.0 NULL 5 NULL 1969-12-31 16:00:00.959 0.0 NULL 15601.0 1241106.353 3.0 0.0 0.0 -23.0 -5 NULL NULL -3 NULL NULL +5.0 NULL 15601.0 NULL 5 NULL 1969-12-31 15:59:56.527 0.0 NULL 15601.0 1241106.353 3.0 0.0 0.0 -23.0 -5 NULL NULL -3 NULL NULL 58.0 NULL 15601.0 NULL 58 NULL 1969-12-31 15:59:56.527 0.0 NULL 15601.0 1241106.353 33.0 0.0 0.0 -23.0 -58 NULL NULL -23 NULL NULL 8.0 false NULL 10V3pN5r5lI2qWl2lG103 8 -362835731 1969-12-31 16:00:15.892 0.0 362835704.72 NULL NULL 1.0 0.0 0.0 NULL -8 0.0 -3.62835744E8 -7 -362835704.72 0.0 8.0 false NULL 10c4qt584m5y6uWT 8 -183000142 1969-12-31 16:00:15.892 0.0 183000115.72 NULL NULL 1.0 0.0 0.0 NULL -8 0.0 -1.8300016E8 -7 -183000115.72 0.0 diff --git ql/src/test/results/clientpositive/vectorization_limit.q.out ql/src/test/results/clientpositive/vectorization_limit.q.out index 17cdd51..dc29ee3 100644 --- ql/src/test/results/clientpositive/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/vectorization_limit.q.out @@ -446,26 +446,12 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double) outputColumnNames: ctinyint, cdouble - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 5] Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT cdouble) - Group By Vectorization: - aggregators: VectorUDAFCount(col 5) -> bigint - className: VectorGroupByOperator - vectorOutput: true - keyExpressions: col 0, col 5 - native: false - projectedOutputColumns: [0] keys: ctinyint (type: tinyint), cdouble (type: double) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -474,22 +460,14 @@ STAGE PLANS: key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No buckets IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false, No DISTINCT columns IS false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported + vectorized: false Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true diff --git ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out index 81292ec..37b72cc 100644 --- ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out +++ ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out @@ -50,15 +50,12 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported + vectorized: false Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -125,15 +122,12 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported + vectorized: false Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true