diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index e06e643..58e91c5 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -602,6 +602,19 @@ minillaplocal.query.files=acid_globallimit.q,\ vector_auto_smb_mapjoin_14.q,\ vector_decimal_2.q,\ vector_decimal_udf.q,\ + vector_groupby_cube1.q,\ + vector_groupby_grouping_id1.q,\ + vector_groupby_grouping_id2.q,\ + vector_groupby_grouping_id3.q,\ + vector_groupby_grouping_sets1.q,\ + vector_groupby_grouping_sets2.q,\ + vector_groupby_grouping_sets3.q,\ + vector_groupby_grouping_sets4.q,\ + vector_groupby_grouping_sets5.q,\ + vector_groupby_grouping_sets6.q,\ + vector_groupby_grouping_sets_grouping.q,\ + vector_groupby_grouping_sets_limit.q,\ + vector_groupby_grouping_window.q,\ vector_join30.q,\ vector_join_filters.q,\ vector_leftsemi_mapjoin.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java index 935b47b..bedaf1b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java @@ -20,10 +20,8 @@ import java.util.Arrays; -import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; /** * Class to keep information on a set of typed vector columns. Used by @@ -64,147 +62,85 @@ */ protected int[] intervalDayTimeIndices; - /** - * Helper class for looking up a key value based on key index. - */ - public class KeyLookupHelper { - public int longIndex; - public int doubleIndex; - public int stringIndex; - public int decimalIndex; - public int timestampIndex; - public int intervalDayTimeIndex; - - private static final int INDEX_UNUSED = -1; - - private void resetIndices() { - this.longIndex = this.doubleIndex = this.stringIndex = this.decimalIndex = - timestampIndex = intervalDayTimeIndex = INDEX_UNUSED; - } - public void setLong(int index) { - resetIndices(); - this.longIndex= index; - } - - public void setDouble(int index) { - resetIndices(); - this.doubleIndex = index; - } - - public void setString(int index) { - resetIndices(); - this.stringIndex = index; - } + final protected int keyCount; + private int addKeyIndex; - public void setDecimal(int index) { - resetIndices(); - this.decimalIndex = index; - } + private int addLongIndex; + private int addDoubleIndex; + private int addStringIndex; + private int addDecimalIndex; + private int addTimestampIndex; + private int addIntervalDayTimeIndex; - public void setTimestamp(int index) { - resetIndices(); - this.timestampIndex= index; - } - - public void setIntervalDayTime(int index) { - resetIndices(); - this.intervalDayTimeIndex= index; - } - } - - /** - * Lookup vector to map from key index to primitive type index. - */ - protected KeyLookupHelper[] indexLookup; - - private int keyCount; - private int addIndex; - - protected int longIndicesIndex; - protected int doubleIndicesIndex; - protected int stringIndicesIndex; - protected int decimalIndicesIndex; - protected int timestampIndicesIndex; - protected int intervalDayTimeIndicesIndex; + // Given the keyIndex these arrays return: + // The ColumnVector.Type, + // The type specific index into longIndices, doubleIndices, etc... + protected ColumnVector.Type[] columnVectorTypes; + protected int[] columnTypeSpecificIndices; protected VectorColumnSetInfo(int keyCount) { this.keyCount = keyCount; - this.addIndex = 0; + this.addKeyIndex = 0; // We'll over allocate and then shrink the array for each type longIndices = new int[this.keyCount]; - longIndicesIndex = 0; + addLongIndex = 0; doubleIndices = new int[this.keyCount]; - doubleIndicesIndex = 0; + addDoubleIndex = 0; stringIndices = new int[this.keyCount]; - stringIndicesIndex = 0; + addStringIndex = 0; decimalIndices = new int[this.keyCount]; - decimalIndicesIndex = 0; + addDecimalIndex = 0; timestampIndices = new int[this.keyCount]; - timestampIndicesIndex = 0; + addTimestampIndex = 0; intervalDayTimeIndices = new int[this.keyCount]; - intervalDayTimeIndicesIndex = 0; - indexLookup = new KeyLookupHelper[this.keyCount]; - } - - protected void addKey(String outputType) throws HiveException { - indexLookup[addIndex] = new KeyLookupHelper(); + addIntervalDayTimeIndex = 0; - String typeName = VectorizationContext.mapTypeNameSynonyms(outputType); + columnVectorTypes = new ColumnVector.Type[this.keyCount]; + columnTypeSpecificIndices = new int[this.keyCount]; + } - TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); - Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); + protected void addKey(ColumnVector.Type columnVectorType) throws HiveException { switch (columnVectorType) { case LONG: - longIndices[longIndicesIndex] = addIndex; - indexLookup[addIndex].setLong(longIndicesIndex); - ++longIndicesIndex; + longIndices[addLongIndex] = addKeyIndex; + columnTypeSpecificIndices[addKeyIndex] = addLongIndex++; break; - case DOUBLE: - doubleIndices[doubleIndicesIndex] = addIndex; - indexLookup[addIndex].setDouble(doubleIndicesIndex); - ++doubleIndicesIndex; + doubleIndices[addDoubleIndex] = addKeyIndex; + columnTypeSpecificIndices[addKeyIndex] = addDoubleIndex++; break; - case BYTES: - stringIndices[stringIndicesIndex]= addIndex; - indexLookup[addIndex].setString(stringIndicesIndex); - ++stringIndicesIndex; + stringIndices[addStringIndex]= addKeyIndex; + columnTypeSpecificIndices[addKeyIndex] = addStringIndex++; break; - case DECIMAL: - decimalIndices[decimalIndicesIndex]= addIndex; - indexLookup[addIndex].setDecimal(decimalIndicesIndex); - ++decimalIndicesIndex; + decimalIndices[addDecimalIndex]= addKeyIndex; + columnTypeSpecificIndices[addKeyIndex] = addDecimalIndex++; break; - case TIMESTAMP: - timestampIndices[timestampIndicesIndex] = addIndex; - indexLookup[addIndex].setTimestamp(timestampIndicesIndex); - ++timestampIndicesIndex; + timestampIndices[addTimestampIndex] = addKeyIndex; + columnTypeSpecificIndices[addKeyIndex] = addTimestampIndex++; break; - case INTERVAL_DAY_TIME: - intervalDayTimeIndices[intervalDayTimeIndicesIndex] = addIndex; - indexLookup[addIndex].setIntervalDayTime(intervalDayTimeIndicesIndex); - ++intervalDayTimeIndicesIndex; + intervalDayTimeIndices[addIntervalDayTimeIndex] = addKeyIndex; + columnTypeSpecificIndices[addKeyIndex] = addIntervalDayTimeIndex++; break; - default: throw new HiveException("Unexpected column vector type " + columnVectorType); } - addIndex++; + columnVectorTypes[addKeyIndex] = columnVectorType; + addKeyIndex++; } - protected void finishAdding() { - longIndices = Arrays.copyOf(longIndices, longIndicesIndex); - doubleIndices = Arrays.copyOf(doubleIndices, doubleIndicesIndex); - stringIndices = Arrays.copyOf(stringIndices, stringIndicesIndex); - decimalIndices = Arrays.copyOf(decimalIndices, decimalIndicesIndex); - timestampIndices = Arrays.copyOf(timestampIndices, timestampIndicesIndex); - intervalDayTimeIndices = Arrays.copyOf(intervalDayTimeIndices, intervalDayTimeIndicesIndex); + protected void finishAdding() throws HiveException { + longIndices = Arrays.copyOf(longIndices, addLongIndex); + doubleIndices = Arrays.copyOf(doubleIndices, addDoubleIndex); + stringIndices = Arrays.copyOf(stringIndices, addStringIndex); + decimalIndices = Arrays.copyOf(decimalIndices, addDecimalIndex); + timestampIndices = Arrays.copyOf(timestampIndices, addTimestampIndex); + intervalDayTimeIndices = Arrays.copyOf(intervalDayTimeIndices, addIntervalDayTimeIndex); } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index 2605203..532818c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -22,16 +22,20 @@ import java.lang.management.MemoryMXBean; import java.lang.ref.SoftReference; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; +import org.apache.commons.lang.ArrayUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.KeyWrapper; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory; @@ -51,6 +55,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import javolution.util.FastBitSet; + import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; @@ -107,6 +113,20 @@ private transient VectorAssignRow vectorAssignRow; + private transient boolean groupingSetsPresent; + + // The field bit pattern or "id" for each grouping set. + private transient int[] groupingSets; + + private transient int groupingSetPosition; + + private transient int groupingSetsVectorColumnNum; + + private transient boolean[][] allGroupingSetsOverrideIsNulls; + + private transient boolean currentIsFirstGroupingSet; + private transient boolean[] currentGroupingSetsOverrideIsNulls; + private transient int numEntriesHashTable; private transient long maxHashTblMemory; @@ -327,9 +347,22 @@ public void initialize(Configuration hconf) throws HiveException { @Override public void processBatch(VectorizedRowBatch batch) throws HiveException { + if (!groupingSetsPresent || currentIsFirstGroupingSet) { + + // Evaluate the key expressions once. + currentIsFirstGroupingSet = false; + for(int i = 0; i < keyExpressions.length; ++i) { + keyExpressions[i].evaluate(batch); + } + } + // First we traverse the batch to evaluate and prepare the KeyWrappers // After this the KeyWrappers are properly set and hash code is computed - keyWrappersBatch.evaluateBatch(batch); + if (!groupingSetsPresent) { + keyWrappersBatch.evaluateBatch(batch); + } else { + keyWrappersBatch.evaluateBatchGroupingSets(batch, currentGroupingSetsOverrideIsNulls); + } // Next we locate the aggregation buffer set for each key prepareBatchAggregationBufferSets(batch); @@ -550,6 +583,37 @@ private void checkHashModeEfficiency() throws HiveException { } } + private class ProcessingModeHashGroupingSetsAggregate extends ProcessingModeHashAggregate { + + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { + + // We drive the ProcessingModeHashAggregate logic with the same batch but different + // grouping set id and null variation. + // PERFORMANCE NOTE: We do not try to reuse columns and generate the KeyWrappers anew... + + // NOTE: We are overwriting the constant vector value... + LongColumnVector dummyColVector = (LongColumnVector) batch.cols[groupingSetsVectorColumnNum]; + dummyColVector.isRepeating = true; + dummyColVector.noNulls = true; + dummyColVector.isNull[0] = false; + long[] dummyIdVector = dummyColVector.vector; + + currentIsFirstGroupingSet = true; + final int size = groupingSets.length; + for (int i = 0; i < size; i++) { + + // Set first element of repeating dummy id column. + dummyIdVector[0] = groupingSets[i]; + + // The member currentGroupingSetsOverrideIsNulls will tell our super ProcessingModeHashAggregate + // to null out more columns. + currentGroupingSetsOverrideIsNulls = allGroupingSetsOverrideIsNulls[i]; + super.processBatch(batch); + } + } + } + /** * Streaming processing mode on ALREADY GROUPED data. Each input VectorizedRowBatch may * have a mix of different keys. Intermediate values are flushed each time key changes. @@ -781,6 +845,49 @@ public VectorGroupByOperator(CompilationOpContext ctx) { super(ctx); } + private void setupGroupingSets() { + + LOG.debug("*NEW* keyExpressions " + Arrays.toString(keyExpressions)); + + groupingSets = ArrayUtils.toPrimitive(conf.getListGroupingSets().toArray(new Integer[0])); + LOG.debug("*NEW* groupingSets " + Arrays.toString(groupingSets)); + groupingSetPosition = conf.getGroupingSetPosition(); + LOG.debug("*NEW* groupingSetPosition " + groupingSetPosition); + + allGroupingSetsOverrideIsNulls = new boolean[groupingSets.length][]; + + int pos = 0; + for (int groupingSet: groupingSets) { + + // Create the mapping corresponding to the grouping set + LOG.debug("*NEW* pos " + pos + " groupingSet " + groupingSet); + + // Assume all columns are null, except the dummy column is always non-null. + boolean[] groupingSetsOverrideIsNull = new boolean[keyExpressions.length]; + Arrays.fill(groupingSetsOverrideIsNull, true); + groupingSetsOverrideIsNull[groupingSetPosition] = false; + + // Add keys of this grouping set. + FastBitSet bitset = GroupByOperator.groupingSet2BitSet(groupingSet); + for (int keyPos = bitset.nextSetBit(0); keyPos >= 0; + keyPos = bitset.nextSetBit(keyPos+1)) { + groupingSetsOverrideIsNull[keyPos] = false; + } + + LOG.debug("*NEW* pos " + pos + " groupingSetsOverrideIsNull " + Arrays.toString(groupingSetsOverrideIsNull)); + allGroupingSetsOverrideIsNulls[pos] = groupingSetsOverrideIsNull; + pos++; + } + + // The last key column is the dummy grouping set id. + // + // Figure out which (scratch) column was used so we can overwrite the dummy id. + + VectorExpression dummyIdColVector = keyExpressions[groupingSetPosition]; + groupingSetsVectorColumnNum = dummyIdColVector.getOutputColumn(); + + LOG.debug("*NEW* dummy grouping set id column num " + groupingSetsVectorColumnNum + " outputType " + dummyIdColVector.getOutputType()); + } @Override protected void initializeOp(Configuration hconf) throws HiveException { @@ -834,13 +941,20 @@ protected void initializeOp(Configuration hconf) throws HiveException { forwardCache = new Object[outputKeyLength + aggregators.length]; + groupingSetsPresent = false; switch (conf.getVectorDesc().getProcessingMode()) { case GLOBAL: Preconditions.checkState(outputKeyLength == 0); processingMode = this.new ProcessingModeGlobalAggregate(); break; case HASH: - processingMode = this.new ProcessingModeHashAggregate(); + groupingSetsPresent = conf.isGroupingSetsPresent(); + if (!groupingSetsPresent) { + processingMode = this.new ProcessingModeHashAggregate(); + } else { + setupGroupingSets(); + processingMode = this.new ProcessingModeHashGroupingSetsAggregate(); + } break; case MERGE_PARTIAL: processingMode = this.new ProcessingModeReduceMergePartial(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java index 50d0452..3c54784 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java @@ -19,8 +19,12 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.io.IOException; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.DataOutputBuffer; /** @@ -35,7 +39,10 @@ public VectorGroupKeyHelper(int keyCount) { void init(VectorExpression[] keyExpressions) throws HiveException { // Inspect the output type of each key expression. for(int i=0; i < keyExpressions.length; ++i) { - addKey(keyExpressions[i].getOutputType()); + String typeName = VectorizationContext.mapTypeNameSynonyms(keyExpressions[i].getOutputType()); + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); + addKey(columnVectorType); } finishAdding(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java index 2bd1850..fad615b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java @@ -57,15 +57,20 @@ private HiveDecimalWritable[] decimalValues; private Timestamp[] timestampValues; + private static Timestamp ZERO_TIMESTAMP = new Timestamp(0); private HiveIntervalDayTime[] intervalDayTimeValues; + private static HiveIntervalDayTime ZERO_INTERVALDAYTIME= new HiveIntervalDayTime(0, 0); + // NOTE: The null array is index by keyIndex, which is not available internally. private boolean[] isNull; + private int hashcode; public VectorHashKeyWrapper(int longValuesCount, int doubleValuesCount, int byteValuesCount, int decimalValuesCount, int timestampValuesCount, - int intervalDayTimeValuesCount) { + int intervalDayTimeValuesCount, + int keyCount) { longValues = longValuesCount > 0 ? new long[longValuesCount] : EMPTY_LONG_ARRAY; doubleValues = doubleValuesCount > 0 ? new double[doubleValuesCount] : EMPTY_DOUBLE_ARRAY; decimalValues = decimalValuesCount > 0 ? new HiveDecimalWritable[decimalValuesCount] : EMPTY_DECIMAL_ARRAY; @@ -89,8 +94,7 @@ public VectorHashKeyWrapper(int longValuesCount, int doubleValuesCount, for(int i = 0; i < intervalDayTimeValuesCount; ++i) { intervalDayTimeValues[i] = new HiveIntervalDayTime(); } - isNull = new boolean[longValuesCount + doubleValuesCount + byteValuesCount + - decimalValuesCount + timestampValuesCount + intervalDayTimeValuesCount]; + isNull = new boolean[keyCount]; hashcode = 0; } @@ -127,20 +131,19 @@ public void setHashKey() { * Hashing the string is potentially expensive so is better to branch. * Additionally not looking at values for nulls allows us not reset the values. */ - if (!isNull[longValues.length + doubleValues.length + i]) { + if (byteLengths[i] != -1) { byte[] bytes = byteValues[i]; int start = byteStarts[i]; int length = byteLengths[i]; - if (length == bytes.length && start == 0) { - hashcode ^= Arrays.hashCode(bytes); - } - else { + // if (length == bytes.length && start == 0) { + // hashcode ^= Arrays.hashCode(bytes); + // } else { // Unfortunately there is no Arrays.hashCode(byte[], start, length) for(int j = start; j < start + length; ++j) { // use 461 as is a (sexy!) prime. hashcode ^= 461 * bytes[j]; } - } + // } } } } @@ -171,7 +174,7 @@ private boolean bytesEquals(VectorHashKeyWrapper keyThat) { //By the time we enter here the byteValues.lentgh and isNull must have already been compared for (int i = 0; i < byteValues.length; ++i) { // the byte comparison is potentially expensive so is better to branch on null - if (!isNull[longValues.length + doubleValues.length + i]) { + if (byteLengths[i] != -1) { if (!StringExpr.equal( byteValues[i], byteStarts[i], @@ -215,7 +218,7 @@ public void duplicateTo(VectorHashKeyWrapper clone) { for (int i = 0; i < byteValues.length; ++i) { // avoid allocation/copy of nulls, because it potentially expensive. // branch instead. - if (!isNull[longValues.length + doubleValues.length + i]) { + if (byteLengths[i] != -1) { clone.byteValues[i] = Arrays.copyOfRange(byteValues[i], byteStarts[i], byteStarts[i] + byteLengths[i]); } @@ -261,106 +264,138 @@ public void copyKey(KeyWrapper oldWrapper) { throw new UnsupportedOperationException(); } - public void assignDouble(int index, double d) { - doubleValues[index] = d; - isNull[longValues.length + index] = false; + public void assignLong(int index, long v) { + longValues[index] = v; } - public void assignNullDouble(int index) { - doubleValues[index] = 0; // assign 0 to simplify hashcode - isNull[longValues.length + index] = true; + public void assignNullLong(int keyIndex, int index) { + isNull[keyIndex] = true; + longValues[index] = 0; // assign 0 to simplify hashcode } - public void assignLong(int index, long v) { - longValues[index] = v; - isNull[index] = false; + public void assignDouble(int index, double d) { + doubleValues[index] = d; } - public void assignNullLong(int index) { - longValues[index] = 0; // assign 0 to simplify hashcode - isNull[index] = true; + public void assignNullDouble(int keyIndex, int index) { + isNull[keyIndex] = true; + doubleValues[index] = 0; // assign 0 to simplify hashcode } public void assignString(int index, byte[] bytes, int start, int length) { byteValues[index] = bytes; byteStarts[index] = start; byteLengths[index] = length; - isNull[longValues.length + doubleValues.length + index] = false; } - public void assignNullString(int index) { - // We do not assign the value to byteValues[] because the value is never used on null - isNull[longValues.length + doubleValues.length + index] = true; + public void assignNullString(int keyIndex, int index) { + isNull[keyIndex] = true; + // We need some value that indicates NULL. + byteLengths[index] = -1; } public void assignDecimal(int index, HiveDecimalWritable value) { decimalValues[index].set(value); - isNull[longValues.length + doubleValues.length + byteValues.length + index] = false; } - public void assignNullDecimal(int index) { - isNull[longValues.length + doubleValues.length + byteValues.length + index] = true; + public void assignNullDecimal(int keyIndex, int index) { + isNull[keyIndex] = true; + decimalValues[index].set(HiveDecimal.ZERO); // assign 0 to simplify hashcode } public void assignTimestamp(int index, Timestamp value) { timestampValues[index] = value; - isNull[longValues.length + doubleValues.length + byteValues.length + - decimalValues.length + index] = false; } public void assignTimestamp(int index, TimestampColumnVector colVector, int elementNum) { colVector.timestampUpdate(timestampValues[index], elementNum); - isNull[longValues.length + doubleValues.length + byteValues.length + - decimalValues.length + index] = false; } - public void assignNullTimestamp(int index) { - isNull[longValues.length + doubleValues.length + byteValues.length + - decimalValues.length + index] = true; + public void assignNullTimestamp(int keyIndex, int index) { + isNull[keyIndex] = true; + timestampValues[index] = ZERO_TIMESTAMP; // assign 0 to simplify hashcode } public void assignIntervalDayTime(int index, HiveIntervalDayTime value) { intervalDayTimeValues[index].set(value); - isNull[longValues.length + doubleValues.length + byteValues.length + - decimalValues.length + timestampValues.length + index] = false; } public void assignIntervalDayTime(int index, IntervalDayTimeColumnVector colVector, int elementNum) { intervalDayTimeValues[index].set(colVector.asScratchIntervalDayTime(elementNum)); - isNull[longValues.length + doubleValues.length + byteValues.length + - decimalValues.length + timestampValues.length + index] = false; } - public void assignNullIntervalDayTime(int index) { - isNull[longValues.length + doubleValues.length + byteValues.length + - decimalValues.length + timestampValues.length + index] = true; + public void assignNullIntervalDayTime(int keyIndex, int index) { + isNull[keyIndex] = true; + intervalDayTimeValues[index] = ZERO_INTERVALDAYTIME; // assign 0 to simplify hashcode } @Override public String toString() { - return String.format("%d[%s] %d[%s] %d[%s] %d[%s] %d[%s] %d[%s]", - longValues.length, Arrays.toString(longValues), - doubleValues.length, Arrays.toString(doubleValues), - byteValues.length, Arrays.toString(byteValues), - decimalValues.length, Arrays.toString(decimalValues), - timestampValues.length, Arrays.toString(timestampValues), - intervalDayTimeValues.length, Arrays.toString(intervalDayTimeValues)); - } - - public boolean getIsLongNull(int i) { - return isNull[i]; - } + StringBuilder sb = new StringBuilder(); + boolean isFirst = true; + if (longValues.length > 0) { + isFirst = false; + sb.append("longs "); + sb.append(Arrays.toString(longValues)); + } + if (doubleValues.length > 0) { + if (isFirst) { + isFirst = false; + } else { + sb.append(", "); + } + sb.append("doubles "); + sb.append(Arrays.toString(doubleValues)); + } + if (byteValues.length > 0) { + if (isFirst) { + isFirst = false; + } else { + sb.append(", "); + } + sb.append("byte lengths "); + sb.append(Arrays.toString(byteLengths)); + } + if (decimalValues.length > 0) { + if (isFirst) { + isFirst = false; + } else { + sb.append(", "); + } + sb.append("decimals "); + sb.append(Arrays.toString(decimalValues)); + } + if (timestampValues.length > 0) { + if (isFirst) { + isFirst = false; + } else { + sb.append(", "); + } + sb.append("timestamps "); + sb.append(Arrays.toString(timestampValues)); + } + if (intervalDayTimeValues.length > 0) { + if (isFirst) { + isFirst = false; + } else { + sb.append(", "); + } + sb.append("interval day times "); + sb.append(Arrays.toString(intervalDayTimeValues)); + } - public boolean getIsDoubleNull(int i) { - return isNull[longValues.length + i]; - } + if (isFirst) { + isFirst = false; + } else { + sb.append(", "); + } + sb.append("nulls "); + sb.append(Arrays.toString(isNull)); - public boolean getIsBytesNull(int i) { - return isNull[longValues.length + doubleValues.length + i]; + return sb.toString(); } - public long getLongValue(int i) { return longValues[i]; } @@ -390,30 +425,23 @@ public int getVariableSize() { return variableSize; } - public boolean getIsDecimalNull(int i) { - return isNull[longValues.length + doubleValues.length + byteValues.length + i]; - } - public HiveDecimalWritable getDecimal(int i) { return decimalValues[i]; } - public boolean getIsTimestampNull(int i) { - return isNull[longValues.length + doubleValues.length + byteValues.length + - decimalValues.length + i]; - } - public Timestamp getTimestamp(int i) { return timestampValues[i]; } - public boolean getIsIntervalDayTimeNull(int i) { - return isNull[longValues.length + doubleValues.length + byteValues.length + - decimalValues.length + timestampValues.length + i]; - } - public HiveIntervalDayTime getIntervalDayTime(int i) { return intervalDayTimeValues[i]; } -} + public void clearIsNull() { + Arrays.fill(isNull, false); + } + + public boolean isNull(int keyIndex) { + return isNull[keyIndex]; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java index b4708b5..c23d437 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java @@ -23,6 +23,9 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; /** * Class for handling vectorized hash map key wrappers. It evaluates the key columns in a @@ -85,12 +88,168 @@ public int getKeysFixedSize() { * @throws HiveException */ public void evaluateBatch(VectorizedRowBatch batch) throws HiveException { - for(int i = 0; i < keyExpressions.length; ++i) { - keyExpressions[i].evaluate(batch); + + for(int i=0;i= 0) { - return kw.getIsLongNull(klh.longIndex) ? null : - keyOutputWriter.writeValue(kw.getLongValue(klh.longIndex)); - } else if (klh.doubleIndex >= 0) { - return kw.getIsDoubleNull(klh.doubleIndex) ? null : - keyOutputWriter.writeValue(kw.getDoubleValue(klh.doubleIndex)); - } else if (klh.stringIndex >= 0) { - return kw.getIsBytesNull(klh.stringIndex) ? null : - keyOutputWriter.writeValue( - kw.getBytes(klh.stringIndex), - kw.getByteStart(klh.stringIndex), - kw.getByteLength(klh.stringIndex)); - } else if (klh.decimalIndex >= 0) { - return kw.getIsDecimalNull(klh.decimalIndex)? null : - keyOutputWriter.writeValue( - kw.getDecimal(klh.decimalIndex)); - } else if (klh.timestampIndex >= 0) { - return kw.getIsTimestampNull(klh.timestampIndex)? null : - keyOutputWriter.writeValue( - kw.getTimestamp(klh.timestampIndex)); - } else if (klh.intervalDayTimeIndex >= 0) { - return kw.getIsIntervalDayTimeNull(klh.intervalDayTimeIndex)? null : - keyOutputWriter.writeValue( - kw.getIntervalDayTime(klh.intervalDayTimeIndex)); - } else { - throw new HiveException(String.format( - "Internal inconsistent KeyLookupHelper at index [%d]:%d %d %d %d %d %d", - i, klh.longIndex, klh.doubleIndex, klh.stringIndex, klh.decimalIndex, - klh.timestampIndex, klh.intervalDayTimeIndex)); + if (kw.isNull(keyIndex)) { + return null; + } + + ColumnVector.Type columnVectorType = columnVectorTypes[keyIndex]; + int columnTypeSpecificIndex = columnTypeSpecificIndices[keyIndex]; + + switch (columnVectorType) { + case LONG: + return keyOutputWriter.writeValue( + kw.getLongValue(columnTypeSpecificIndex)); + case DOUBLE: + return keyOutputWriter.writeValue( + kw.getDoubleValue(columnTypeSpecificIndex)); + case BYTES: + return keyOutputWriter.writeValue( + kw.getBytes(columnTypeSpecificIndex), + kw.getByteStart(columnTypeSpecificIndex), + kw.getByteLength(columnTypeSpecificIndex)); + case DECIMAL: + return keyOutputWriter.writeValue( + kw.getDecimal(columnTypeSpecificIndex)); + case TIMESTAMP: + return keyOutputWriter.writeValue( + kw.getTimestamp(columnTypeSpecificIndex)); + case INTERVAL_DAY_TIME: + return keyOutputWriter.writeValue( + kw.getIntervalDayTime(columnTypeSpecificIndex)); + default: + throw new HiveException("Unexpected column vector type " + columnVectorType); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index e3d9d7f..6b92891 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -1622,10 +1622,6 @@ private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, bo GroupByDesc desc = op.getConf(); VectorGroupByDesc vectorDesc = desc.getVectorDesc(); - if (desc.isGroupingSetsPresent()) { - LOG.info("Grouping sets not supported in vector mode"); - return false; - } if (desc.pruneGroupingSetId()) { LOG.info("Pruning grouping set id not supported in vector mode"); return false; @@ -1940,6 +1936,10 @@ private boolean validateAggregationIsPrimitive(VectorAggregateExpression vectorA LOG.info("Cannot vectorize groupby aggregate expression: UDF " + udfName + " not supported"); return new Pair(false, false); } + if (aggDesc.getDistinct()) { + LOG.info("Cannot vectorize groupby aggregate expression: DISTINCT"); + return new Pair(false, false); + } if (aggDesc.getParameters() != null && !validateExprNodeDesc(aggDesc.getParameters())) { LOG.info("Cannot vectorize groupby aggregate expression: UDF parameters not supported"); return new Pair(false, false); diff --git ql/src/test/queries/clientpositive/groupby_grouping_id1.q ql/src/test/queries/clientpositive/groupby_grouping_id1.q index d43ea37..9948ce9 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_id1.q +++ ql/src/test/queries/clientpositive/groupby_grouping_id1.q @@ -2,6 +2,8 @@ CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE; LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +-- SORT_QUERY_RESULTS + SELECT key, val, GROUPING__ID from T1 group by key, val with cube; SELECT key, val, GROUPING__ID from T1 group by cube(key, val); diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets1.q ql/src/test/queries/clientpositive/groupby_grouping_sets1.q index e239a87..4fcfd3b 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets1.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets1.q @@ -1,3 +1,6 @@ + +-- SORT_QUERY_RESULTS + CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1; diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets2.q ql/src/test/queries/clientpositive/groupby_grouping_sets2.q index b470964..af5bbe6 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets2.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets2.q @@ -1,6 +1,8 @@ set hive.mapred.mode=nonstrict; set hive.new.job.grouping.set.cardinality=2; +-- SORT_QUERY_RESULTS + CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1; diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets3.q ql/src/test/queries/clientpositive/groupby_grouping_sets3.q index 3c1a5e7..ef4a7aa 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets3.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets3.q @@ -1,3 +1,6 @@ + +-- SORT_QUERY_RESULTS + -- In this test, 2 files are loaded into table T1. The data contains rows with the same value of a and b, -- with different number of rows for a and b in each file. Since bucketizedHiveInputFormat is used, -- this tests that the aggregate function stores the partial aggregate state correctly even if an diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets5.q ql/src/test/queries/clientpositive/groupby_grouping_sets5.q index c1c98b3..570d464 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets5.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets5.q @@ -7,6 +7,8 @@ CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMIN LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1; +-- SORT_QUERY_RESULTS + -- This tests that cubes and rollups work fine where the source is a sub-query EXPLAIN SELECT a, b, count(*) FROM diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets6.q ql/src/test/queries/clientpositive/groupby_grouping_sets6.q index 5cdb4a5..e537bce 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets6.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets6.q @@ -3,6 +3,8 @@ CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMIN LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1; +-- SORT_QUERY_RESULTS + set hive.optimize.ppd = false; -- This filter is not pushed down diff --git ql/src/test/queries/clientpositive/vector_groupby_cube1.q ql/src/test/queries/clientpositive/vector_groupby_cube1.q new file mode 100644 index 0000000..fd2f0de --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_cube1.q @@ -0,0 +1,55 @@ +set hive.mapred.mode=nonstrict; +set hive.map.aggr=true; +set hive.groupby.skewindata=false; + +-- SORT_QUERY_RESULTS + +CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; + +EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube; +EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY CUBE(key, val); + +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube; + +EXPLAIN +SELECT key, val, GROUPING__ID, count(1) FROM T1 GROUP BY key, val with cube; + +SELECT key, val, GROUPING__ID, count(1) FROM T1 GROUP BY key, val with cube; + +EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube; + +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube; + +set hive.groupby.skewindata=true; + +EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube; + +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube; + +EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube; + +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube; + + +set hive.multigroupby.singlereducer=true; + +CREATE TABLE T2(key1 STRING, key2 STRING, val INT) STORED AS TEXTFILE; +CREATE TABLE T3(key1 STRING, key2 STRING, val INT) STORED AS TEXTFILE; + +EXPLAIN +FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube; + + +FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube; + diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_id1.q ql/src/test/queries/clientpositive/vector_groupby_grouping_id1.q new file mode 100644 index 0000000..2c9bd3d --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_id1.q @@ -0,0 +1,22 @@ +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +CREATE TABLE T1_text(key STRING, val STRING) STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +-- SORT_QUERY_RESULTS + +SELECT key, val, GROUPING__ID from T1 group by key, val with cube; +SELECT key, val, GROUPING__ID from T1 group by cube(key, val); + +SELECT GROUPING__ID, key, val from T1 group by key, val with rollup; +SELECT GROUPING__ID, key, val from T1 group by rollup (key, val); + +SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by key, val with cube; +SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by cube(key, val); + diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_id2.q ql/src/test/queries/clientpositive/vector_groupby_grouping_id2.q new file mode 100644 index 0000000..ebebc2e --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_id2.q @@ -0,0 +1,64 @@ +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +set hive.groupby.skewindata = true; + +-- SORT_QUERY_RESULTS + +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP; +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY ROLLUP (key, value); + +SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID; + +SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY ROLLUP(key, value) +) t +GROUP BY GROUPING__ID; + + +SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID; + +SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY ROLLUP(key,value)) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY ROLLUP(key, value)) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID; + + + + + +set hive.groupby.skewindata = false; + +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP; + +SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID; + +SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID; + + diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_id3.q ql/src/test/queries/clientpositive/vector_groupby_grouping_id3.q new file mode 100644 index 0000000..e41ed1a --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_id3.q @@ -0,0 +1,30 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +set hive.cbo.enable = false; + +-- SORT_QUERY_RESULTS + +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1; + +set hive.cbo.enable = true; + +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1; + diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets1.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets1.q new file mode 100644 index 0000000..f8561fa --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets1.q @@ -0,0 +1,29 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +-- SORT_QUERY_RESULTS + +CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +SELECT * FROM T1; + +SELECT a, b, count(*) from T1 group by a, b with cube; +SELECT a, b, count(*) from T1 group by cube(a, b); + +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()); + +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)); + +SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c); + +SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)); + +SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b); + diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets2.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets2.q new file mode 100644 index 0000000..4fbab4e --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets2.q @@ -0,0 +1,36 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; +set hive.mapred.mode=nonstrict; +set hive.new.job.grouping.set.cardinality=2; + +-- SORT_QUERY_RESULTS + +CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +-- Since 4 grouping sets would be generated for the query below, an additional MR job should be created +EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube; + +EXPLAIN +SELECT a, b, count(*) from T1 group by cube(a, b); +SELECT a, b, count(*) from T1 group by a, b with cube; + +EXPLAIN +SELECT a, b, sum(c) from T1 group by a, b with cube; +SELECT a, b, sum(c) from T1 group by a, b with cube; + +CREATE TABLE T2(a STRING, b STRING, c int, d int); + +INSERT OVERWRITE TABLE T2 +SELECT a, b, c, c from T1; + +EXPLAIN +SELECT a, b, sum(c+d) from T2 group by a, b with cube; +SELECT a, b, sum(c+d) from T2 group by a, b with cube; diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets3.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets3.q new file mode 100644 index 0000000..d299279 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets3.q @@ -0,0 +1,42 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; +set hive.mapred.mode=nonstrict; +set hive.new.job.grouping.set.cardinality=2; + +-- SORT_QUERY_RESULTS + +-- In this test, 2 files are loaded into table T1. The data contains rows with the same value of a and b, +-- with different number of rows for a and b in each file. Since bucketizedHiveInputFormat is used, +-- this tests that the aggregate function stores the partial aggregate state correctly even if an +-- additional MR job is created for processing the grouping sets. +CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets1.txt' INTO TABLE T1_text; +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets2.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; +set hive.new.job.grouping.set.cardinality = 30; + +-- The query below will execute in a single MR job, since 4 rows are generated per input row +-- (cube of a,b will lead to (a,b), (a, null), (null, b) and (null, null) and +-- hive.new.job.grouping.set.cardinality is more than 4. +EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by a, b with cube; + +EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by cube(a, b); +SELECT a, b, avg(c), count(*) from T1 group by a, b with cube; + +set hive.new.job.grouping.set.cardinality=2; + +-- The query below will execute in 2 MR jobs, since hive.new.job.grouping.set.cardinality is set to 2. +-- The partial aggregation state should be maintained correctly across MR jobs. +EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by a, b with cube; +SELECT a, b, avg(c), count(*) from T1 group by a, b with cube; + diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets4.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets4.q new file mode 100644 index 0000000..ef0d832 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets4.q @@ -0,0 +1,57 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; +set hive.mapred.mode=nonstrict; +set hive.merge.mapfiles = false; +set hive.merge.mapredfiles = false; + +-- SORT_QUERY_RESULTS + +-- Set merging to false above to make the explain more readable + +CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +-- This tests that cubes and rollups work fine inside sub-queries. +EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a; + +EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by cube(a, b) ) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by cube(a, b) ) subq2 +on subq1.a = subq2.a; + +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a; + +set hive.new.job.grouping.set.cardinality=2; + +-- Since 4 grouping sets would be generated for each sub-query, an additional MR job should be created +-- for each of them +EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a; + +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a; + diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets5.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets5.q new file mode 100644 index 0000000..15be3f3 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets5.q @@ -0,0 +1,39 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; +set hive.mapred.mode=nonstrict; +set hive.merge.mapfiles = false; +set hive.merge.mapredfiles = false; +-- Set merging to false above to make the explain more readable + +CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +-- SORT_QUERY_RESULTS + +-- This tests that cubes and rollups work fine where the source is a sub-query +EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube; + +EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by cube(a, b); + +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube; + +set hive.new.job.grouping.set.cardinality=2; + +-- Since 4 grouping sets would be generated for the cube, an additional MR job should be created +EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube; + +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube; diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets6.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets6.q new file mode 100644 index 0000000..72c2078 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets6.q @@ -0,0 +1,38 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; +set hive.mapred.mode=nonstrict; + +CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +-- SORT_QUERY_RESULTS + +set hive.optimize.ppd = false; + +-- This filter is not pushed down +EXPLAIN +SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5; + +SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5; + +set hive.cbo.enable = true; + +-- This filter is pushed down through aggregate with grouping sets by Calcite +EXPLAIN +SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5; + +SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5; diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q new file mode 100644 index 0000000..7b7c892 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q @@ -0,0 +1,99 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +-- SORT_QUERY_RESULTS + +explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value); + +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value); + +explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value); + +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value); + +explain +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1; + +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1; + +explain +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end; + +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end; + +set hive.cbo.enable=false; + +explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value); + +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value); + +explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value); + +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value); + +explain +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1; + +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1; + +explain +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end; + +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end; diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_limit.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_limit.q new file mode 100644 index 0000000..00649f7 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_limit.q @@ -0,0 +1,42 @@ +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +-- SORT_QUERY_RESULTS + +EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube LIMIT 10; + +SELECT a, b, count(*) from T1 group by a, b with cube LIMIT 10; + +EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) LIMIT 10; + +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) LIMIT 10; + +EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) LIMIT 10; + +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) LIMIT 10; + +EXPLAIN +SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) LIMIT 10; + +SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) LIMIT 10; + +EXPLAIN +SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) LIMIT 10; + +SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) LIMIT 10; + +EXPLAIN +SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) LIMIT 10; + +SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) LIMIT 10; diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_window.q ql/src/test/queries/clientpositive/vector_groupby_grouping_window.q new file mode 100644 index 0000000..7d75433 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_window.q @@ -0,0 +1,20 @@ +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +create table t(category int, live int, comments int) stored as orc; +insert into table t select key, 0, 2 from src tablesample(3 rows); + +explain +select category, max(live) live, max(comments) comments, rank() OVER (PARTITION BY category ORDER BY comments) rank1 +FROM t +GROUP BY category +GROUPING SETS ((), (category)) +HAVING max(comments) > 0; + +select category, max(live) live, max(comments) comments, rank() OVER (PARTITION BY category ORDER BY comments) rank1 +FROM t +GROUP BY category +GROUPING SETS ((), (category)) +HAVING max(comments) > 0; diff --git ql/src/test/results/clientpositive/groupby_grouping_id1.q.out ql/src/test/results/clientpositive/groupby_grouping_id1.q.out index 9ef7615..e390535 100644 --- ql/src/test/results/clientpositive/groupby_grouping_id1.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_id1.q.out @@ -22,24 +22,24 @@ POSTHOOK: query: SELECT key, val, GROUPING__ID from T1 group by key, val with cu POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 0 -NULL 11 2 -NULL 12 2 -NULL 13 2 -NULL 17 2 -NULL 18 2 -NULL 28 2 -1 NULL 1 1 11 3 -2 NULL 1 +1 NULL 1 2 12 3 -3 NULL 1 +2 NULL 1 3 13 3 -7 NULL 1 +3 NULL 1 7 17 3 -8 NULL 1 +7 NULL 1 8 18 3 8 28 3 +8 NULL 1 +NULL 11 2 +NULL 12 2 +NULL 13 2 +NULL 17 2 +NULL 18 2 +NULL 28 2 +NULL NULL 0 PREHOOK: query: SELECT key, val, GROUPING__ID from T1 group by cube(key, val) PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -48,24 +48,24 @@ POSTHOOK: query: SELECT key, val, GROUPING__ID from T1 group by cube(key, val) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 0 -NULL 11 2 -NULL 12 2 -NULL 13 2 -NULL 17 2 -NULL 18 2 -NULL 28 2 -1 NULL 1 1 11 3 -2 NULL 1 +1 NULL 1 2 12 3 -3 NULL 1 +2 NULL 1 3 13 3 -7 NULL 1 +3 NULL 1 7 17 3 -8 NULL 1 +7 NULL 1 8 18 3 8 28 3 +8 NULL 1 +NULL 11 2 +NULL 12 2 +NULL 13 2 +NULL 17 2 +NULL 18 2 +NULL 28 2 +NULL NULL 0 PREHOOK: query: SELECT GROUPING__ID, key, val from T1 group by key, val with rollup PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -76,14 +76,14 @@ POSTHOOK: Input: default@t1 #### A masked pattern was here #### 0 NULL NULL 1 1 NULL -3 1 11 1 2 NULL -3 2 12 1 3 NULL -3 3 13 1 7 NULL -3 7 17 1 8 NULL +3 1 11 +3 2 12 +3 3 13 +3 7 17 3 8 18 3 8 28 PREHOOK: query: SELECT GROUPING__ID, key, val from T1 group by rollup (key, val) @@ -96,14 +96,14 @@ POSTHOOK: Input: default@t1 #### A masked pattern was here #### 0 NULL NULL 1 1 NULL -3 1 11 1 2 NULL -3 2 12 1 3 NULL -3 3 13 1 7 NULL -3 7 17 1 8 NULL +3 1 11 +3 2 12 +3 3 13 +3 7 17 3 8 18 3 8 28 PREHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by key, val with cube @@ -114,24 +114,24 @@ POSTHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 0 0 -NULL 11 2 2 -NULL 12 2 2 -NULL 13 2 2 -NULL 17 2 2 -NULL 18 2 2 -NULL 28 2 2 -1 NULL 1 1 1 11 3 3 -2 NULL 1 1 +1 NULL 1 1 2 12 3 3 -3 NULL 1 1 +2 NULL 1 1 3 13 3 3 -7 NULL 1 1 +3 NULL 1 1 7 17 3 3 -8 NULL 1 1 +7 NULL 1 1 8 18 3 3 8 28 3 3 +8 NULL 1 1 +NULL 11 2 2 +NULL 12 2 2 +NULL 13 2 2 +NULL 17 2 2 +NULL 18 2 2 +NULL 28 2 2 +NULL NULL 0 0 PREHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by cube(key, val) PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -140,21 +140,21 @@ POSTHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 0 0 -NULL 11 2 2 -NULL 12 2 2 -NULL 13 2 2 -NULL 17 2 2 -NULL 18 2 2 -NULL 28 2 2 -1 NULL 1 1 1 11 3 3 -2 NULL 1 1 +1 NULL 1 1 2 12 3 3 -3 NULL 1 1 +2 NULL 1 1 3 13 3 3 -7 NULL 1 1 +3 NULL 1 1 7 17 3 3 -8 NULL 1 1 +7 NULL 1 1 8 18 3 3 8 28 3 3 +8 NULL 1 1 +NULL 11 2 2 +NULL 12 2 2 +NULL 13 2 2 +NULL 17 2 2 +NULL 18 2 2 +NULL 28 2 2 +NULL NULL 0 0 diff --git ql/src/test/results/clientpositive/groupby_grouping_sets1.q.out ql/src/test/results/clientpositive/groupby_grouping_sets1.q.out index aebba0d..c685313 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets1.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets1.q.out @@ -22,12 +22,12 @@ POSTHOOK: query: SELECT * FROM T1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -8 1 1 -5 2 2 1 1 3 2 2 4 2 3 5 3 2 8 +5 2 2 +8 1 1 PREHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -36,21 +36,21 @@ POSTHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 6 -NULL 1 2 -NULL 2 3 -NULL 3 1 -1 NULL 1 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 -3 NULL 1 +2 NULL 2 3 2 1 -5 NULL 1 +3 NULL 1 5 2 1 -8 NULL 1 +5 NULL 1 8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 PREHOOK: query: SELECT a, b, count(*) from T1 group by cube(a, b) PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -59,21 +59,21 @@ POSTHOOK: query: SELECT a, b, count(*) from T1 group by cube(a, b) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 6 -NULL 1 2 -NULL 2 3 -NULL 3 1 -1 NULL 1 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 -3 NULL 1 +2 NULL 2 3 2 1 -5 NULL 1 +3 NULL 1 5 2 1 -8 NULL 1 +5 NULL 1 8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 PREHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -82,21 +82,21 @@ POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 6 -NULL 1 2 -NULL 2 3 -NULL 3 1 -1 NULL 1 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 -3 NULL 1 +2 NULL 2 3 2 1 -5 NULL 1 +3 NULL 1 5 2 1 -8 NULL 1 +5 NULL 1 8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 PREHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -105,17 +105,17 @@ POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, ( POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -1 NULL 1 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 -3 NULL 1 +2 NULL 2 3 2 1 -5 NULL 1 +3 NULL 1 5 2 1 -8 NULL 1 +5 NULL 1 8 1 1 +8 NULL 1 PREHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -124,6 +124,11 @@ POSTHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### +1 +2 +3 +5 +8 NULL NULL NULL @@ -133,11 +138,6 @@ NULL NULL NULL NULL -1 -2 -3 -5 -8 PREHOOK: query: SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) PREHOOK: type: QUERY PREHOOK: Input: default@t1 diff --git ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out index b4f8ce7..d733f8c 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out @@ -190,21 +190,21 @@ POSTHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 6 -NULL 1 2 -NULL 2 3 -NULL 3 1 -1 NULL 1 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 -3 NULL 1 +2 NULL 2 3 2 1 -5 NULL 1 +3 NULL 1 5 2 1 -8 NULL 1 +5 NULL 1 8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 PREHOOK: query: EXPLAIN SELECT a, b, sum(c) from T1 group by a, b with cube PREHOOK: type: QUERY @@ -297,21 +297,21 @@ POSTHOOK: query: SELECT a, b, sum(c) from T1 group by a, b with cube POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 23.0 -NULL 1 4.0 -NULL 2 14.0 -NULL 3 5.0 -1 NULL 3.0 1 1 3.0 -2 NULL 9.0 +1 NULL 3.0 2 2 4.0 2 3 5.0 -3 NULL 8.0 +2 NULL 9.0 3 2 8.0 -5 NULL 2.0 +3 NULL 8.0 5 2 2.0 -8 NULL 1.0 +5 NULL 2.0 8 1 1.0 +8 NULL 1.0 +NULL 1 4.0 +NULL 2 14.0 +NULL 3 5.0 +NULL NULL 23.0 PREHOOK: query: CREATE TABLE T2(a STRING, b STRING, c int, d int) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -426,18 +426,18 @@ POSTHOOK: query: SELECT a, b, sum(c+d) from T2 group by a, b with cube POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 #### A masked pattern was here #### -NULL NULL 46 -NULL 1 8 -NULL 2 28 -NULL 3 10 -1 NULL 6 1 1 6 -2 NULL 18 +1 NULL 6 2 2 8 2 3 10 -3 NULL 16 +2 NULL 18 3 2 16 -5 NULL 4 +3 NULL 16 5 2 4 -8 NULL 2 +5 NULL 4 8 1 2 +8 NULL 2 +NULL 1 8 +NULL 2 28 +NULL 3 10 +NULL NULL 46 diff --git ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out index 67cbdcd..20d5b10 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out @@ -148,22 +148,22 @@ POSTHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 3.8333333333333335 12 -NULL 1 2.0 5 -NULL 2 5.2 5 -NULL 3 5.0 2 -1 NULL 2.6666666666666665 3 1 1 3.0 2 1 2 2.0 1 -2 NULL 5.2 5 +1 NULL 2.6666666666666665 3 2 2 5.333333333333333 3 2 3 5.0 2 -3 NULL 8.0 1 +2 NULL 5.2 5 3 2 8.0 1 -5 NULL 2.0 1 +3 NULL 8.0 1 5 1 2.0 1 -8 NULL 1.0 2 +5 NULL 2.0 1 8 1 1.0 2 +8 NULL 1.0 2 +NULL 1 2.0 5 +NULL 2 5.2 5 +NULL 3 5.0 2 +NULL NULL 3.8333333333333335 12 PREHOOK: query: EXPLAIN SELECT a, b, avg(c), count(*) from T1 group by a, b with cube PREHOOK: type: QUERY @@ -256,19 +256,19 @@ POSTHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 3.8333333333333335 12 -NULL 1 2.0 5 -NULL 2 5.2 5 -NULL 3 5.0 2 -1 NULL 2.6666666666666665 3 1 1 3.0 2 1 2 2.0 1 -2 NULL 5.2 5 +1 NULL 2.6666666666666665 3 2 2 5.333333333333333 3 2 3 5.0 2 -3 NULL 8.0 1 +2 NULL 5.2 5 3 2 8.0 1 -5 NULL 2.0 1 +3 NULL 8.0 1 5 1 2.0 1 -8 NULL 1.0 2 +5 NULL 2.0 1 8 1 1.0 2 +8 NULL 1.0 2 +NULL 1 2.0 5 +NULL 2 5.2 5 +NULL 3 5.0 2 +NULL NULL 3.8333333333333335 12 diff --git ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out index 166f110..46f696d 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out @@ -202,21 +202,21 @@ POSTHOOK: query: SELECT a, b, count(*) FROM POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 6 -NULL 1 2 -NULL 2 3 -NULL 3 1 -1 NULL 1 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 -3 NULL 1 +2 NULL 2 3 2 1 -5 NULL 1 +3 NULL 1 5 2 1 -8 NULL 1 +5 NULL 1 8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 PREHOOK: query: EXPLAIN SELECT a, b, count(*) FROM (SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube @@ -341,18 +341,18 @@ POSTHOOK: query: SELECT a, b, count(*) FROM POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 6 -NULL 1 2 -NULL 2 3 -NULL 3 1 -1 NULL 1 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 -3 NULL 1 +2 NULL 2 3 2 1 -5 NULL 1 +3 NULL 1 5 2 1 -8 NULL 1 +5 NULL 1 8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 diff --git ql/src/test/results/clientpositive/groupby_grouping_sets6.q.out ql/src/test/results/clientpositive/groupby_grouping_sets6.q.out index 16f0871..b174e89 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets6.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets6.q.out @@ -81,8 +81,8 @@ WHERE res.a=5 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -5 NULL 5 2 +5 NULL PREHOOK: query: EXPLAIN SELECT a, b FROM (SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res @@ -150,5 +150,5 @@ WHERE res.a=5 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -5 NULL 5 2 +5 NULL diff --git ql/src/test/results/clientpositive/llap/vector_count.q.out ql/src/test/results/clientpositive/llap/vector_count.q.out index 9ef5c2b..7c71355 100644 --- ql/src/test/results/clientpositive/llap/vector_count.q.out +++ ql/src/test/results/clientpositive/llap/vector_count.q.out @@ -80,7 +80,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: bigint) - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap @@ -153,7 +153,7 @@ STAGE PLANS: sort order: ++++ Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint) - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap diff --git ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out new file mode 100644 index 0000000..f8a6231 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out @@ -0,0 +1,773 @@ +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY CUBE(key, val) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY CUBE(key, val) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 11 1 +1 NULL 1 +2 12 1 +2 NULL 1 +3 13 1 +3 NULL 1 +7 17 1 +7 NULL 1 +8 18 1 +8 28 1 +8 NULL 2 +NULL 11 1 +NULL 12 1 +NULL 13 1 +NULL 17 1 +NULL 18 1 +NULL 28 1 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT key, val, GROUPING__ID, count(1) FROM T1 GROUP BY key, val with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, val, GROUPING__ID, count(1) FROM T1 GROUP BY key, val with cube +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, val, GROUPING__ID, count(1) FROM T1 GROUP BY key, val with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, GROUPING__ID, count(1) FROM T1 GROUP BY key, val with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 11 3 1 +1 NULL 1 1 +2 12 3 1 +2 NULL 1 1 +3 13 3 1 +3 NULL 1 1 +7 17 3 1 +7 NULL 1 1 +8 18 3 1 +8 28 3 1 +8 NULL 1 2 +NULL 11 2 1 +NULL 12 2 1 +NULL 13 2 1 +NULL 17 2 1 +NULL 18 2 1 +NULL 28 2 1 +NULL NULL 0 6 +PREHOOK: query: EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT val) + keys: key (type: string), 0 (type: int), val (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col2:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 1 +2 1 +3 1 +7 1 +8 2 +NULL 6 +PREHOOK: query: EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 11 1 +1 NULL 1 +2 12 1 +2 NULL 1 +3 13 1 +3 NULL 1 +7 17 1 +7 NULL 1 +8 18 1 +8 28 1 +8 NULL 2 +NULL 11 1 +NULL 12 1 +NULL 13 1 +NULL 17 1 +NULL 18 1 +NULL 28 1 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT val) + keys: key (type: string), 0 (type: int), val (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col2:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: final + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 1 +2 1 +3 1 +7 1 +8 2 +NULL 6 +PREHOOK: query: CREATE TABLE T2(key1 STRING, key2 STRING, val INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key1 STRING, key2 STRING, val INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: CREATE TABLE T3(key1 STRING, key2 STRING, val INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T3 +POSTHOOK: query: CREATE TABLE T3(key1 STRING, key2 STRING, val INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T3 +PREHOOK: query: EXPLAIN +FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string), val (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(1) + keys: key (type: string), val (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t2 + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t3 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t2 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t3 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t2 +PREHOOK: Output: default@t3 +POSTHOOK: query: FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t2 +POSTHOOK: Output: default@t3 +POSTHOOK: Lineage: t2.key1 SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: t2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] +POSTHOOK: Lineage: t2.val EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: t3.key1 SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: t3.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] +POSTHOOK: Lineage: t3.val EXPRESSION [(t1)t1.null, ] diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out new file mode 100644 index 0000000..878c83f --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out @@ -0,0 +1,179 @@ +PREHOOK: query: CREATE TABLE T1_text(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.key SIMPLE [(t1_text)t1_text.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: t1.val SIMPLE [(t1_text)t1_text.FieldSchema(name:val, type:string, comment:null), ] +t1_text.key t1_text.val +PREHOOK: query: SELECT key, val, GROUPING__ID from T1 group by key, val with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, GROUPING__ID from T1 group by key, val with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key val grouping__id +1 11 3 +1 NULL 1 +2 12 3 +2 NULL 1 +3 13 3 +3 NULL 1 +7 17 3 +7 NULL 1 +8 18 3 +8 28 3 +8 NULL 1 +NULL 11 2 +NULL 12 2 +NULL 13 2 +NULL 17 2 +NULL 18 2 +NULL 28 2 +NULL NULL 0 +PREHOOK: query: SELECT key, val, GROUPING__ID from T1 group by cube(key, val) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, GROUPING__ID from T1 group by cube(key, val) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key val grouping__id +1 11 3 +1 NULL 1 +2 12 3 +2 NULL 1 +3 13 3 +3 NULL 1 +7 17 3 +7 NULL 1 +8 18 3 +8 28 3 +8 NULL 1 +NULL 11 2 +NULL 12 2 +NULL 13 2 +NULL 17 2 +NULL 18 2 +NULL 28 2 +NULL NULL 0 +PREHOOK: query: SELECT GROUPING__ID, key, val from T1 group by key, val with rollup +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT GROUPING__ID, key, val from T1 group by key, val with rollup +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +grouping__id key val +0 NULL NULL +1 1 NULL +1 2 NULL +1 3 NULL +1 7 NULL +1 8 NULL +3 1 11 +3 2 12 +3 3 13 +3 7 17 +3 8 18 +3 8 28 +PREHOOK: query: SELECT GROUPING__ID, key, val from T1 group by rollup (key, val) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT GROUPING__ID, key, val from T1 group by rollup (key, val) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +grouping__id key val +0 NULL NULL +1 1 NULL +1 2 NULL +1 3 NULL +1 7 NULL +1 8 NULL +3 1 11 +3 2 12 +3 3 13 +3 7 17 +3 8 18 +3 8 28 +PREHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by key, val with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by key, val with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key val grouping__id _c3 +1 11 3 3 +1 NULL 1 1 +2 12 3 3 +2 NULL 1 1 +3 13 3 3 +3 NULL 1 1 +7 17 3 3 +7 NULL 1 1 +8 18 3 3 +8 28 3 3 +8 NULL 1 1 +NULL 11 2 2 +NULL 12 2 2 +NULL 13 2 2 +NULL 17 2 2 +NULL 18 2 2 +NULL 28 2 2 +NULL NULL 0 0 +PREHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by cube(key, val) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by cube(key, val) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key val grouping__id _c3 +1 11 3 3 +1 NULL 1 1 +2 12 3 3 +2 NULL 1 1 +3 13 3 3 +3 NULL 1 1 +7 17 3 3 +7 NULL 1 1 +8 18 3 3 +8 28 3 3 +8 NULL 1 1 +NULL 11 2 2 +NULL 12 2 2 +NULL 13 2 2 +NULL 17 2 2 +NULL 18 2 2 +NULL 28 2 2 +NULL NULL 0 0 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out new file mode 100644 index 0000000..41c6883 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out @@ -0,0 +1,359 @@ +PREHOOK: query: CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.key SIMPLE [(t1_text)t1_text.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: t1.value SIMPLE [(t1_text)t1_text.FieldSchema(name:value, type:int, comment:null), ] +t1_text.key t1_text.value +PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 +1 1 3 1 +1 NULL 1 2 +1 NULL 3 1 +2 2 3 1 +2 NULL 1 1 +3 3 3 1 +3 NULL 1 2 +3 NULL 3 1 +4 5 3 1 +4 NULL 1 1 +NULL NULL 0 6 +PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY ROLLUP (key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY ROLLUP (key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 +1 1 3 1 +1 NULL 1 2 +1 NULL 3 1 +2 2 3 1 +2 NULL 1 1 +3 3 3 1 +3 NULL 1 2 +3 NULL 3 1 +4 5 3 1 +4 NULL 1 1 +NULL NULL 0 6 +PREHOOK: query: SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +grouping__id _c1 +0 1 +1 4 +3 6 +PREHOOK: query: SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY ROLLUP(key, value) +) t +GROUP BY GROUPING__ID +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY ROLLUP(key, value) +) t +GROUP BY GROUPING__ID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +grouping__id _c1 +0 1 +1 4 +3 6 +PREHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +t1.grouping__id t2.grouping__id +0 0 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +PREHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY ROLLUP(key,value)) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY ROLLUP(key, value)) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY ROLLUP(key,value)) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY ROLLUP(key, value)) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +t1.grouping__id t2.grouping__id +0 0 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 +1 1 3 1 +1 NULL 1 2 +1 NULL 3 1 +2 2 3 1 +2 NULL 1 1 +3 3 3 1 +3 NULL 1 2 +3 NULL 3 1 +4 5 3 1 +4 NULL 1 1 +NULL NULL 0 6 +PREHOOK: query: SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +grouping__id _c1 +0 1 +1 4 +3 6 +PREHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +t1.grouping__id t2.grouping__id +0 0 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out new file mode 100644 index 0000000..349fd13 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out @@ -0,0 +1,67 @@ +PREHOOK: query: CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.key SIMPLE [(t1_text)t1_text.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: t1.value SIMPLE [(t1_text)t1_text.FieldSchema(name:value, type:int, comment:null), ] +t1_text.key t1_text.value +PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 +1 NULL 1 2 +2 NULL 1 1 +3 NULL 1 2 +4 NULL 1 1 +PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out new file mode 100644 index 0000000..871b0e3 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out @@ -0,0 +1,188 @@ +PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, type:string, comment:null), ] +t1_text.a t1_text.b t1_text.c +PREHOOK: query: SELECT * FROM T1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM T1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +t1.a t1.b t1.c +1 1 3 +2 2 4 +2 3 5 +3 2 8 +5 2 2 +8 1 1 +PREHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 2 1 +3 NULL 1 +5 2 1 +5 NULL 1 +8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 +PREHOOK: query: SELECT a, b, count(*) from T1 group by cube(a, b) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) from T1 group by cube(a, b) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 2 1 +3 NULL 1 +5 2 1 +5 NULL 1 +8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 +PREHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 2 1 +3 NULL 1 +5 2 1 +5 NULL 1 +8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 +PREHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 2 1 +3 NULL 1 +5 2 1 +5 NULL 1 +8 1 1 +8 NULL 1 +PREHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a +1 +2 +3 +5 +8 +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +PREHOOK: query: SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a +1 +2 +3 +5 +8 +PREHOOK: query: SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +_c0 _c1 +2.0 1 +4.0 1 +5.0 2 +7.0 1 +9.0 1 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out new file mode 100644 index 0000000..5d79348 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out @@ -0,0 +1,469 @@ +PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, type:string, comment:null), ] +t1_text.a t1_text.b t1_text.c +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by cube(a, b) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by cube(a, b) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 2 1 +3 NULL 1 +5 2 1 +5 NULL 1 +8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT a, b, sum(c) from T1 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, sum(c) from T1 group by a, b with cube +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string), c (type: string) + outputColumnNames: a, b, c + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(c) + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: double) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, sum(c) from T1 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, sum(c) from T1 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 3.0 +1 NULL 3.0 +2 2 4.0 +2 3 5.0 +2 NULL 9.0 +3 2 8.0 +3 NULL 8.0 +5 2 2.0 +5 NULL 2.0 +8 1 1.0 +8 NULL 1.0 +NULL 1 4.0 +NULL 2 14.0 +NULL 3 5.0 +NULL NULL 23.0 +PREHOOK: query: CREATE TABLE T2(a STRING, b STRING, c int, d int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(a STRING, b STRING, c int, d int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: INSERT OVERWRITE TABLE T2 +SELECT a, b, c, c from T1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t2 +POSTHOOK: query: INSERT OVERWRITE TABLE T2 +SELECT a, b, c, c from T1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: t2.a SIMPLE [(t1)t1.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t2.b SIMPLE [(t1)t1.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t2.c EXPRESSION [(t1)t1.FieldSchema(name:c, type:string, comment:null), ] +POSTHOOK: Lineage: t2.d EXPRESSION [(t1)t1.FieldSchema(name:c, type:string, comment:null), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: EXPLAIN +SELECT a, b, sum(c+d) from T2 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, sum(c+d) from T2 group by a, b with cube +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string), (c + d) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 168 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 24 Data size: 168 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 12 Data size: 84 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 84 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 84 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, sum(c+d) from T2 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, sum(c+d) from T2 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +a b _c2 +1 1 6 +1 NULL 6 +2 2 8 +2 3 10 +2 NULL 18 +3 2 16 +3 NULL 16 +5 2 4 +5 NULL 4 +8 1 2 +8 NULL 2 +NULL 1 8 +NULL 2 28 +NULL 3 10 +NULL NULL 46 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out new file mode 100644 index 0000000..b30aabd --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out @@ -0,0 +1,314 @@ +PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets1.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets1.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets2.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets2.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, type:string, comment:null), ] +t1_text.a t1_text.b t1_text.c +PREHOOK: query: EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string), c (type: string) + outputColumnNames: a, b, c + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(c), count() + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 48 Data size: 12240 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 48 Data size: 12240 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: bigint) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double), _col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by cube(a, b) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by cube(a, b) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string), c (type: string) + outputColumnNames: a, b, c + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(c), count() + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 48 Data size: 12240 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 48 Data size: 12240 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: bigint) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double), _col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 _c3 +1 1 3.0 2 +1 2 2.0 1 +1 NULL 2.6666666666666665 3 +2 2 5.333333333333333 3 +2 3 5.0 2 +2 NULL 5.2 5 +3 2 8.0 1 +3 NULL 8.0 1 +5 1 2.0 1 +5 NULL 2.0 1 +8 1 1.0 2 +8 NULL 1.0 2 +NULL 1 2.0 5 +NULL 2 5.2 5 +NULL 3 5.0 2 +NULL NULL 3.8333333333333335 12 +PREHOOK: query: EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string), c (type: string) + outputColumnNames: a, b, c + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(c), count() + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: bigint) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 48 Data size: 12240 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 48 Data size: 12240 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double), _col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 _c3 +1 1 3.0 2 +1 2 2.0 1 +1 NULL 2.6666666666666665 3 +2 2 5.333333333333333 3 +2 3 5.0 2 +2 NULL 5.2 5 +3 2 8.0 1 +3 NULL 8.0 1 +5 1 2.0 1 +5 NULL 2.0 1 +8 1 1.0 2 +8 NULL 1.0 2 +NULL 1 2.0 5 +NULL 2 5.2 5 +NULL 3 5.0 2 +NULL NULL 3.8333333333333335 12 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out new file mode 100644 index 0000000..8052314 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out @@ -0,0 +1,554 @@ +PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, type:string, comment:null), ] +t1_text.a t1_text.b t1_text.c +PREHOOK: query: EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(a) < 3.0) (type: boolean) + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(a) < 3.0) (type: boolean) + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 1122 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 1122 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by cube(a, b) ) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by cube(a, b) ) subq2 +on subq1.a = subq2.a +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by cube(a, b) ) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by cube(a, b) ) subq2 +on subq1.a = subq2.a +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(a) < 3.0) (type: boolean) + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(a) < 3.0) (type: boolean) + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 1122 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 1122 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +subq1.a subq1.b subq1._c2 subq2.a subq2.b subq2._c2 +1 1 1 1 1 1 +1 1 1 1 NULL 1 +1 NULL 1 1 1 1 +1 NULL 1 1 NULL 1 +2 2 1 2 2 1 +2 2 1 2 3 1 +2 2 1 2 NULL 2 +2 3 1 2 2 1 +2 3 1 2 3 1 +2 3 1 2 NULL 2 +2 NULL 2 2 2 1 +2 NULL 2 2 3 1 +2 NULL 2 2 NULL 2 +PREHOOK: query: EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(a) < 3.0) (type: boolean) + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(a) < 3.0) (type: boolean) + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 1122 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 1122 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +subq1.a subq1.b subq1._c2 subq2.a subq2.b subq2._c2 +1 1 1 1 1 1 +1 1 1 1 NULL 1 +1 NULL 1 1 1 1 +1 NULL 1 1 NULL 1 +2 2 1 2 2 1 +2 2 1 2 3 1 +2 2 1 2 NULL 2 +2 3 1 2 2 1 +2 3 1 2 3 1 +2 3 1 2 NULL 2 +2 NULL 2 2 2 1 +2 NULL 2 2 3 1 +2 NULL 2 2 NULL 2 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out new file mode 100644 index 0000000..0312d81 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out @@ -0,0 +1,371 @@ +PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, type:string, comment:null), ] +t1_text.a t1_text.b t1_text.c +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by cube(a, b) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by cube(a, b) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 2 1 +3 NULL 1 +5 2 1 +5 NULL 1 +8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 2 1 +3 NULL 1 +5 2 1 +5 NULL 1 +8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out new file mode 100644 index 0000000..206ba79 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out @@ -0,0 +1,192 @@ +PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, type:string, comment:null), ] +t1_text.a t1_text.b t1_text.c +PREHOOK: query: EXPLAIN +SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(a) = 5.0) (type: boolean) + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b +5 2 +5 NULL +PREHOOK: query: EXPLAIN +SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(a) = 5.0) (type: boolean) + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b +5 2 +5 NULL diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out new file mode 100644 index 0000000..27aaece --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out @@ -0,0 +1,858 @@ +PREHOOK: query: CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.key SIMPLE [(t1_text)t1_text.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: t1.value SIMPLE [(t1_text)t1_text.FieldSchema(name:value, type:int, comment:null), ] +t1_text.key t1_text.value +PREHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 18 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 18 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 _c4 +1 1 3 1 1 +1 NULL 1 0 1 +1 NULL 3 1 1 +2 2 3 1 1 +2 NULL 1 0 1 +3 3 3 1 1 +3 NULL 1 0 1 +3 NULL 3 1 1 +4 5 3 1 1 +4 NULL 1 0 1 +NULL NULL 0 0 0 +PREHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 _c4 +1 1 3 1 1 +1 NULL 1 0 1 +1 NULL 3 1 1 +2 2 3 1 1 +2 NULL 1 0 1 +3 3 3 1 1 +3 NULL 1 0 1 +3 NULL 3 1 1 +4 5 3 1 1 +4 NULL 1 0 1 +NULL 1 2 1 0 +NULL 2 2 1 0 +NULL 3 2 1 0 +NULL 5 2 1 0 +NULL NULL 0 0 0 +NULL NULL 2 1 0 +PREHOOK: query: explain +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToInteger(grouping(_col2, 1)) = 1) (type: boolean) + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value +1 1 +1 NULL +2 2 +3 3 +3 NULL +4 5 +NULL 1 +NULL 2 +NULL 3 +NULL 5 +NULL NULL +PREHOOK: query: explain +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((UDFToInteger(grouping(_col2, 1)) = 1) or (UDFToInteger(grouping(_col2, 0)) = 1)) (type: boolean) + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), (grouping(_col2, 1) + grouping(_col2, 0)) (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: tinyint), CASE WHEN ((_col2 = 1)) THEN (_col0) ELSE (null) END (type: int) + sort order: -+ + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value x +1 1 2 +1 NULL 1 +1 NULL 2 +2 2 2 +2 NULL 1 +3 3 2 +3 NULL 1 +3 NULL 2 +4 5 2 +4 NULL 1 +NULL 1 1 +NULL 2 1 +NULL 3 1 +NULL 5 1 +NULL NULL 1 +PREHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 18 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 18 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 _c4 +1 1 3 1 1 +1 NULL 1 0 1 +1 NULL 3 1 1 +2 2 3 1 1 +2 NULL 1 0 1 +3 3 3 1 1 +3 NULL 1 0 1 +3 NULL 3 1 1 +4 5 3 1 1 +4 NULL 1 0 1 +NULL NULL 0 0 0 +PREHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 _c4 +1 1 3 1 1 +1 NULL 1 0 1 +1 NULL 3 1 1 +2 2 3 1 1 +2 NULL 1 0 1 +3 3 3 1 1 +3 NULL 1 0 1 +3 NULL 3 1 1 +4 5 3 1 1 +4 NULL 1 0 1 +NULL 1 2 1 0 +NULL 2 2 1 0 +NULL 3 2 1 0 +NULL 5 2 1 0 +NULL NULL 0 0 0 +NULL NULL 2 1 0 +PREHOOK: query: explain +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (grouping(_col2, 1) = 1) (type: boolean) + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value +1 1 +1 NULL +2 2 +3 3 +3 NULL +4 5 +NULL 1 +NULL 2 +NULL 3 +NULL 5 +NULL NULL +PREHOOK: query: explain +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((grouping(_col2, 1) = 1) or (grouping(_col2, 0) = 1)) (type: boolean) + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), (grouping(_col2, 1) + grouping(_col2, 0)) (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: tinyint), CASE WHEN ((_col2 = 1)) THEN (_col0) END (type: int) + sort order: -+ + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value x +1 1 2 +1 NULL 1 +1 NULL 2 +2 2 2 +2 NULL 1 +3 3 2 +3 NULL 1 +3 NULL 2 +4 5 2 +4 NULL 1 +NULL 1 1 +NULL 2 1 +NULL 3 1 +NULL 5 1 +NULL NULL 1 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out new file mode 100644 index 0000000..8bcc7f2 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out @@ -0,0 +1,568 @@ +PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, type:string, comment:null), ] +t1_text.a t1_text.b t1_text.c +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube LIMIT 10 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2550 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 2550 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) LIMIT 10 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2550 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 2550 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) LIMIT 10 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 2 1 +3 NULL 1 +5 2 1 +5 NULL 1 +8 NULL 1 +PREHOOK: query: EXPLAIN +SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) LIMIT 10 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string), c (type: string) + outputColumnNames: a, b, c + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: a (type: string), b (type: string), c (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 18 Data size: 4590 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int) + sort order: ++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int) + Statistics: Num rows: 18 Data size: 4590 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 2295 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 2295 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 9 Data size: 2295 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 2295 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a +1 +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +PREHOOK: query: EXPLAIN +SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) LIMIT 10 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string) + outputColumnNames: a + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: a (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Limit + Number of rows: 10 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a +1 +2 +3 +5 +8 +PREHOOK: query: EXPLAIN +SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) LIMIT 10 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (UDFToDouble(a) + UDFToDouble(b)) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: double), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: double), _col1 (type: int) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: double), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col2 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: double), _col2 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +_c0 _c1 +2.0 1 +4.0 1 +5.0 2 +7.0 1 +9.0 1 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out new file mode 100644 index 0000000..d8fb578 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out @@ -0,0 +1,157 @@ +PREHOOK: query: create table t(category int, live int, comments int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: create table t(category int, live int, comments int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: insert into table t select key, 0, 2 from src tablesample(3 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t +POSTHOOK: query: insert into table t select key, 0, 2 from src tablesample(3 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.category EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t.comments SIMPLE [] +POSTHOOK: Lineage: t.live SIMPLE [] +_col0 _col1 _col2 +PREHOOK: query: explain +select category, max(live) live, max(comments) comments, rank() OVER (PARTITION BY category ORDER BY comments) rank1 +FROM t +GROUP BY category +GROUPING SETS ((), (category)) +HAVING max(comments) > 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select category, max(live) live, max(comments) comments, rank() OVER (PARTITION BY category ORDER BY comments) rank1 +FROM t +GROUP BY category +GROUPING SETS ((), (category)) +HAVING max(comments) > 0 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: category (type: int), live (type: int), comments (type: int) + outputColumnNames: category, live, comments + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(live), max(comments) + keys: category (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), max(VALUE._col1) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Filter Operator + predicate: (_col3 > 0) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col3 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col2: int, _col3: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col3 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col3 + name: rank + window function: GenericUDAFRankEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select category, max(live) live, max(comments) comments, rank() OVER (PARTITION BY category ORDER BY comments) rank1 +FROM t +GROUP BY category +GROUPING SETS ((), (category)) +HAVING max(comments) > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select category, max(live) live, max(comments) comments, rank() OVER (PARTITION BY category ORDER BY comments) rank1 +FROM t +GROUP BY category +GROUPING SETS ((), (category)) +HAVING max(comments) > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +category live comments rank1 +NULL 0 2 1 +86 0 2 1 +238 0 2 1 +311 0 2 1 diff --git ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out index 77a0695..7d44de2 100644 --- ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out +++ ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out @@ -164,7 +164,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 Execution mode: llap @@ -249,7 +249,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 Execution mode: vectorized, llap @@ -337,7 +337,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 Execution mode: vectorized, llap