diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index e966959..bb716fa 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -603,6 +603,19 @@ minillaplocal.query.files=acid_globallimit.q,\ vector_auto_smb_mapjoin_14.q,\ vector_decimal_2.q,\ vector_decimal_udf.q,\ + vector_groupby_cube1.q,\ + vector_groupby_grouping_id1.q,\ + vector_groupby_grouping_id2.q,\ + vector_groupby_grouping_id3.q,\ + vector_groupby_grouping_sets1.q,\ + vector_groupby_grouping_sets2.q,\ + vector_groupby_grouping_sets3.q,\ + vector_groupby_grouping_sets4.q,\ + vector_groupby_grouping_sets5.q,\ + vector_groupby_grouping_sets6.q,\ + vector_groupby_grouping_sets_grouping.q,\ + vector_groupby_grouping_sets_limit.q,\ + vector_groupby_grouping_window.q,\ vector_join30.q,\ vector_join_filters.q,\ vector_leftsemi_mapjoin.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java index 935b47b..7ac4f07 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java @@ -20,10 +20,8 @@ import java.util.Arrays; -import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; /** * Class to keep information on a set of typed vector columns. Used by @@ -64,147 +62,87 @@ */ protected int[] intervalDayTimeIndices; - /** - * Helper class for looking up a key value based on key index. - */ - public class KeyLookupHelper { - public int longIndex; - public int doubleIndex; - public int stringIndex; - public int decimalIndex; - public int timestampIndex; - public int intervalDayTimeIndex; - - private static final int INDEX_UNUSED = -1; - - private void resetIndices() { - this.longIndex = this.doubleIndex = this.stringIndex = this.decimalIndex = - timestampIndex = intervalDayTimeIndex = INDEX_UNUSED; - } - public void setLong(int index) { - resetIndices(); - this.longIndex= index; - } - - public void setDouble(int index) { - resetIndices(); - this.doubleIndex = index; - } - - public void setString(int index) { - resetIndices(); - this.stringIndex = index; - } - - public void setDecimal(int index) { - resetIndices(); - this.decimalIndex = index; - } - - public void setTimestamp(int index) { - resetIndices(); - this.timestampIndex= index; - } - - public void setIntervalDayTime(int index) { - resetIndices(); - this.intervalDayTimeIndex= index; - } - } - - /** - * Lookup vector to map from key index to primitive type index. - */ - protected KeyLookupHelper[] indexLookup; + final protected int keyCount; + private int addKeyIndex; - private int keyCount; - private int addIndex; + private int addLongIndex; + private int addDoubleIndex; + private int addStringIndex; + private int addDecimalIndex; + private int addTimestampIndex; + private int addIntervalDayTimeIndex; - protected int longIndicesIndex; - protected int doubleIndicesIndex; - protected int stringIndicesIndex; - protected int decimalIndicesIndex; - protected int timestampIndicesIndex; - protected int intervalDayTimeIndicesIndex; + // Given the keyIndex these arrays return: + // The ColumnVector.Type, + // The type specific index into longIndices, doubleIndices, etc... + protected ColumnVector.Type[] columnVectorTypes; + protected int[] columnTypeSpecificIndices; protected VectorColumnSetInfo(int keyCount) { this.keyCount = keyCount; - this.addIndex = 0; + this.addKeyIndex = 0; // We'll over allocate and then shrink the array for each type longIndices = new int[this.keyCount]; - longIndicesIndex = 0; + addLongIndex = 0; doubleIndices = new int[this.keyCount]; - doubleIndicesIndex = 0; + addDoubleIndex = 0; stringIndices = new int[this.keyCount]; - stringIndicesIndex = 0; + addStringIndex = 0; decimalIndices = new int[this.keyCount]; - decimalIndicesIndex = 0; + addDecimalIndex = 0; timestampIndices = new int[this.keyCount]; - timestampIndicesIndex = 0; + addTimestampIndex = 0; intervalDayTimeIndices = new int[this.keyCount]; - intervalDayTimeIndicesIndex = 0; - indexLookup = new KeyLookupHelper[this.keyCount]; - } + addIntervalDayTimeIndex = 0; - protected void addKey(String outputType) throws HiveException { - indexLookup[addIndex] = new KeyLookupHelper(); + columnVectorTypes = new ColumnVector.Type[this.keyCount]; + columnTypeSpecificIndices = new int[this.keyCount]; + } - String typeName = VectorizationContext.mapTypeNameSynonyms(outputType); - TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); - Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); + protected void addKey(ColumnVector.Type columnVectorType) throws HiveException { switch (columnVectorType) { case LONG: - longIndices[longIndicesIndex] = addIndex; - indexLookup[addIndex].setLong(longIndicesIndex); - ++longIndicesIndex; + longIndices[addLongIndex] = addKeyIndex; + columnTypeSpecificIndices[addKeyIndex] = addLongIndex++; break; - case DOUBLE: - doubleIndices[doubleIndicesIndex] = addIndex; - indexLookup[addIndex].setDouble(doubleIndicesIndex); - ++doubleIndicesIndex; + doubleIndices[addDoubleIndex] = addKeyIndex; + columnTypeSpecificIndices[addKeyIndex] = addDoubleIndex++; break; - case BYTES: - stringIndices[stringIndicesIndex]= addIndex; - indexLookup[addIndex].setString(stringIndicesIndex); - ++stringIndicesIndex; + stringIndices[addStringIndex]= addKeyIndex; + columnTypeSpecificIndices[addKeyIndex] = addStringIndex++; break; - case DECIMAL: - decimalIndices[decimalIndicesIndex]= addIndex; - indexLookup[addIndex].setDecimal(decimalIndicesIndex); - ++decimalIndicesIndex; - break; - + decimalIndices[addDecimalIndex]= addKeyIndex; + columnTypeSpecificIndices[addKeyIndex] = addDecimalIndex++; + break; case TIMESTAMP: - timestampIndices[timestampIndicesIndex] = addIndex; - indexLookup[addIndex].setTimestamp(timestampIndicesIndex); - ++timestampIndicesIndex; + timestampIndices[addTimestampIndex] = addKeyIndex; + columnTypeSpecificIndices[addKeyIndex] = addTimestampIndex++; break; - case INTERVAL_DAY_TIME: - intervalDayTimeIndices[intervalDayTimeIndicesIndex] = addIndex; - indexLookup[addIndex].setIntervalDayTime(intervalDayTimeIndicesIndex); - ++intervalDayTimeIndicesIndex; + intervalDayTimeIndices[addIntervalDayTimeIndex] = addKeyIndex; + columnTypeSpecificIndices[addKeyIndex] = addIntervalDayTimeIndex++; break; - default: throw new HiveException("Unexpected column vector type " + columnVectorType); } - addIndex++; + columnVectorTypes[addKeyIndex] = columnVectorType; + addKeyIndex++; } - protected void finishAdding() { - longIndices = Arrays.copyOf(longIndices, longIndicesIndex); - doubleIndices = Arrays.copyOf(doubleIndices, doubleIndicesIndex); - stringIndices = Arrays.copyOf(stringIndices, stringIndicesIndex); - decimalIndices = Arrays.copyOf(decimalIndices, decimalIndicesIndex); - timestampIndices = Arrays.copyOf(timestampIndices, timestampIndicesIndex); - intervalDayTimeIndices = Arrays.copyOf(intervalDayTimeIndices, intervalDayTimeIndicesIndex); + + protected void finishAdding() throws HiveException { + longIndices = Arrays.copyOf(longIndices, addLongIndex); + doubleIndices = Arrays.copyOf(doubleIndices, addDoubleIndex); + stringIndices = Arrays.copyOf(stringIndices, addStringIndex); + decimalIndices = Arrays.copyOf(decimalIndices, addDecimalIndex); + timestampIndices = Arrays.copyOf(timestampIndices, addTimestampIndex); + intervalDayTimeIndices = Arrays.copyOf(intervalDayTimeIndices, addIntervalDayTimeIndex); } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index 2605203..0b3826f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -22,16 +22,21 @@ import java.lang.management.MemoryMXBean; import java.lang.ref.SoftReference; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; +import org.apache.commons.lang.ArrayUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.KeyWrapper; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory; @@ -51,6 +56,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import javolution.util.FastBitSet; + import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; @@ -107,6 +114,24 @@ private transient VectorAssignRow vectorAssignRow; + /* + * Grouping sets members. + */ + private transient boolean groupingSetsPresent; + + // The field bits (i.e. which fields to include) or "id" for each grouping set. + private transient int[] groupingSets; + + // The position in the column keys of the dummy grouping set id column. + private transient int groupingSetPosition; + + // The planner puts a constant field in for the dummy grouping set id. We will overwrite it + // as we process the grouping sets. + private transient ConstantVectorExpression groupingSetsDummyVectorExpression; + + // We translate the grouping set bit field into a boolean arrays. + private transient boolean[][] allGroupingSetsOverrideIsNulls; + private transient int numEntriesHashTable; private transient long maxHashTblMemory; @@ -141,6 +166,32 @@ public void endGroup() throws HiveException { // Do nothing. } + protected abstract void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, + boolean[] currentGroupingSetsOverrideIsNulls) throws HiveException; + + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { + + if (!groupingSetsPresent) { + doProcessBatch(batch, false, null); + return; + } + + // We drive the doProcessBatch logic with the same batch but different + // grouping set id and null variation. + // PERFORMANCE NOTE: We do not try to reuse columns and generate the KeyWrappers anew... + + final int size = groupingSets.length; + for (int i = 0; i < size; i++) { + + // NOTE: We are overwriting the constant vector value... + groupingSetsDummyVectorExpression.setLongValue(groupingSets[i]); + groupingSetsDummyVectorExpression.evaluate(batch); + + doProcessBatch(batch, (i == 0), allGroupingSetsOverrideIsNulls[i]); + } + } + /** * Evaluates the aggregators on the current batch. * The aggregationBatchInfo must have been prepared @@ -204,7 +255,8 @@ public void initialize(Configuration hconf) throws HiveException { } @Override - public void processBatch(VectorizedRowBatch batch) throws HiveException { + public void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, + boolean[] currentGroupingSetsOverrideIsNulls) throws HiveException { for (int i = 0; i < aggregators.length; ++i) { aggregators[i].aggregateInput(aggregationBuffers.getAggregationBuffer(i), batch); } @@ -325,11 +377,24 @@ public void initialize(Configuration hconf) throws HiveException { } @Override - public void processBatch(VectorizedRowBatch batch) throws HiveException { + public void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, + boolean[] currentGroupingSetsOverrideIsNulls) throws HiveException { + + if (!groupingSetsPresent || isFirstGroupingSet) { + + // Evaluate the key expressions once. + for(int i = 0; i < keyExpressions.length; ++i) { + keyExpressions[i].evaluate(batch); + } + } // First we traverse the batch to evaluate and prepare the KeyWrappers // After this the KeyWrappers are properly set and hash code is computed - keyWrappersBatch.evaluateBatch(batch); + if (!groupingSetsPresent) { + keyWrappersBatch.evaluateBatch(batch); + } else { + keyWrappersBatch.evaluateBatchGroupingSets(batch, currentGroupingSetsOverrideIsNulls); + } // Next we locate the aggregation buffer set for each key prepareBatchAggregationBufferSets(batch); @@ -604,10 +669,24 @@ public void free(VectorAggregationBufferRow t) { } @Override - public void processBatch(VectorizedRowBatch batch) throws HiveException { + public void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, + boolean[] currentGroupingSetsOverrideIsNulls) throws HiveException { + + if (!groupingSetsPresent || isFirstGroupingSet) { + + // Evaluate the key expressions once. + for(int i = 0; i < keyExpressions.length; ++i) { + keyExpressions[i].evaluate(batch); + } + } + // First we traverse the batch to evaluate and prepare the KeyWrappers // After this the KeyWrappers are properly set and hash code is computed - keyWrappersBatch.evaluateBatch(batch); + if (!groupingSetsPresent) { + keyWrappersBatch.evaluateBatch(batch); + } else { + keyWrappersBatch.evaluateBatchGroupingSets(batch, currentGroupingSetsOverrideIsNulls); + } VectorHashKeyWrapper[] batchKeys = keyWrappersBatch.getVectorHashKeyWrappers(); @@ -699,7 +778,10 @@ public void close(boolean aborted) throws HiveException { @Override public void initialize(Configuration hconf) throws HiveException { inGroup = false; - groupKeyHelper = new VectorGroupKeyHelper(keyExpressions.length); + + // We do not include the dummy grouping set column in the output. So we pass outputKeyLength + // instead of keyExpressions.length + groupKeyHelper = new VectorGroupKeyHelper(outputKeyLength); groupKeyHelper.init(keyExpressions); groupAggregators = allocateAggregationBuffer(); buffer = new DataOutputBuffer(); @@ -722,7 +804,8 @@ public void endGroup() throws HiveException { } @Override - public void processBatch(VectorizedRowBatch batch) throws HiveException { + public void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, + boolean[] currentGroupingSetsOverrideIsNulls) throws HiveException { assert(inGroup); if (first) { // Copy the group key to output batch now. We'll copy in the aggregates at the end of the group. @@ -781,6 +864,49 @@ public VectorGroupByOperator(CompilationOpContext ctx) { super(ctx); } + private void setupGroupingSets() { + + groupingSetsPresent = conf.isGroupingSetsPresent(); + if (!groupingSetsPresent) { + groupingSets = null; + groupingSetPosition = -1; + groupingSetsDummyVectorExpression = null; + allGroupingSetsOverrideIsNulls = null; + return; + } + + groupingSets = ArrayUtils.toPrimitive(conf.getListGroupingSets().toArray(new Integer[0])); + groupingSetPosition = conf.getGroupingSetPosition(); + + allGroupingSetsOverrideIsNulls = new boolean[groupingSets.length][]; + + int pos = 0; + for (int groupingSet: groupingSets) { + + // Create the mapping corresponding to the grouping set + + // Assume all columns are null, except the dummy column is always non-null. + boolean[] groupingSetsOverrideIsNull = new boolean[keyExpressions.length]; + Arrays.fill(groupingSetsOverrideIsNull, true); + groupingSetsOverrideIsNull[groupingSetPosition] = false; + + // Add keys of this grouping set. + FastBitSet bitset = GroupByOperator.groupingSet2BitSet(groupingSet); + for (int keyPos = bitset.nextSetBit(0); keyPos >= 0; + keyPos = bitset.nextSetBit(keyPos+1)) { + groupingSetsOverrideIsNull[keyPos] = false; + } + + allGroupingSetsOverrideIsNulls[pos] = groupingSetsOverrideIsNull; + pos++; + } + + // The last key column is the dummy grouping set id. + // + // Figure out which (scratch) column was used so we can overwrite the dummy id. + + groupingSetsDummyVectorExpression = (ConstantVectorExpression) keyExpressions[groupingSetPosition]; + } @Override protected void initializeOp(Configuration hconf) throws HiveException { @@ -834,15 +960,19 @@ protected void initializeOp(Configuration hconf) throws HiveException { forwardCache = new Object[outputKeyLength + aggregators.length]; + setupGroupingSets(); + switch (conf.getVectorDesc().getProcessingMode()) { case GLOBAL: Preconditions.checkState(outputKeyLength == 0); + Preconditions.checkState(!groupingSetsPresent); processingMode = this.new ProcessingModeGlobalAggregate(); break; case HASH: processingMode = this.new ProcessingModeHashAggregate(); break; case MERGE_PARTIAL: + Preconditions.checkState(!groupingSetsPresent); processingMode = this.new ProcessingModeReduceMergePartial(); break; case STREAMING: diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java index 50d0452..0ff389e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java @@ -19,8 +19,12 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.io.IOException; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.DataOutputBuffer; /** @@ -33,9 +37,16 @@ public VectorGroupKeyHelper(int keyCount) { } void init(VectorExpression[] keyExpressions) throws HiveException { + + // NOTE: To support pruning the grouping set id dummy key by VectorGroupbyOpeator MERGE_PARTIAL + // case, we use the keyCount passed to the constructor and not keyExpressions.length. + // Inspect the output type of each key expression. - for(int i=0; i < keyExpressions.length; ++i) { - addKey(keyExpressions[i].getOutputType()); + for(int i=0; i < keyCount; ++i) { + String typeName = VectorizationContext.mapTypeNameSynonyms(keyExpressions[i].getOutputType()); + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); + addKey(columnVectorType); } finishAdding(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java index 2bd1850..887feee 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java @@ -30,6 +30,8 @@ import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import com.google.common.base.Preconditions; + /** * A hash map key wrapper for vectorized processing. * It stores the key values as primitives in arrays for each supported primitive type. @@ -57,15 +59,22 @@ private HiveDecimalWritable[] decimalValues; private Timestamp[] timestampValues; + private static Timestamp ZERO_TIMESTAMP = new Timestamp(0); private HiveIntervalDayTime[] intervalDayTimeValues; + private static HiveIntervalDayTime ZERO_INTERVALDAYTIME= new HiveIntervalDayTime(0, 0); + // NOTE: The null array is indexed by keyIndex, which is not available internally. The mapping + // from a long, double, etc index to key index is kept once in the separate + // VectorColumnSetInfo object. private boolean[] isNull; + private int hashcode; public VectorHashKeyWrapper(int longValuesCount, int doubleValuesCount, int byteValuesCount, int decimalValuesCount, int timestampValuesCount, - int intervalDayTimeValuesCount) { + int intervalDayTimeValuesCount, + int keyCount) { longValues = longValuesCount > 0 ? new long[longValuesCount] : EMPTY_LONG_ARRAY; doubleValues = doubleValuesCount > 0 ? new double[doubleValuesCount] : EMPTY_DOUBLE_ARRAY; decimalValues = decimalValuesCount > 0 ? new HiveDecimalWritable[decimalValuesCount] : EMPTY_DECIMAL_ARRAY; @@ -89,8 +98,7 @@ public VectorHashKeyWrapper(int longValuesCount, int doubleValuesCount, for(int i = 0; i < intervalDayTimeValuesCount; ++i) { intervalDayTimeValues[i] = new HiveIntervalDayTime(); } - isNull = new boolean[longValuesCount + doubleValuesCount + byteValuesCount + - decimalValuesCount + timestampValuesCount + intervalDayTimeValuesCount]; + isNull = new boolean[keyCount]; hashcode = 0; } @@ -127,19 +135,14 @@ public void setHashKey() { * Hashing the string is potentially expensive so is better to branch. * Additionally not looking at values for nulls allows us not reset the values. */ - if (!isNull[longValues.length + doubleValues.length + i]) { + if (byteLengths[i] != -1) { byte[] bytes = byteValues[i]; int start = byteStarts[i]; int length = byteLengths[i]; - if (length == bytes.length && start == 0) { - hashcode ^= Arrays.hashCode(bytes); - } - else { - // Unfortunately there is no Arrays.hashCode(byte[], start, length) - for(int j = start; j < start + length; ++j) { - // use 461 as is a (sexy!) prime. - hashcode ^= 461 * bytes[j]; - } + // Unfortunately there is no Arrays.hashCode(byte[], start, length) + for(int j = start; j < start + length; ++j) { + // use 461 as is a (sexy!) prime. + hashcode ^= 461 * bytes[j]; } } } @@ -171,7 +174,7 @@ private boolean bytesEquals(VectorHashKeyWrapper keyThat) { //By the time we enter here the byteValues.lentgh and isNull must have already been compared for (int i = 0; i < byteValues.length; ++i) { // the byte comparison is potentially expensive so is better to branch on null - if (!isNull[longValues.length + doubleValues.length + i]) { + if (byteLengths[i] != -1) { if (!StringExpr.equal( byteValues[i], byteStarts[i], @@ -215,7 +218,7 @@ public void duplicateTo(VectorHashKeyWrapper clone) { for (int i = 0; i < byteValues.length; ++i) { // avoid allocation/copy of nulls, because it potentially expensive. // branch instead. - if (!isNull[longValues.length + doubleValues.length + i]) { + if (byteLengths[i] != -1) { clone.byteValues[i] = Arrays.copyOfRange(byteValues[i], byteStarts[i], byteStarts[i] + byteLengths[i]); } @@ -261,106 +264,141 @@ public void copyKey(KeyWrapper oldWrapper) { throw new UnsupportedOperationException(); } - public void assignDouble(int index, double d) { - doubleValues[index] = d; - isNull[longValues.length + index] = false; + public void assignLong(int index, long v) { + longValues[index] = v; } - public void assignNullDouble(int index) { - doubleValues[index] = 0; // assign 0 to simplify hashcode - isNull[longValues.length + index] = true; + public void assignNullLong(int keyIndex, int index) { + isNull[keyIndex] = true; + longValues[index] = 0; // assign 0 to simplify hashcode } - public void assignLong(int index, long v) { - longValues[index] = v; - isNull[index] = false; + public void assignDouble(int index, double d) { + doubleValues[index] = d; } - public void assignNullLong(int index) { - longValues[index] = 0; // assign 0 to simplify hashcode - isNull[index] = true; + public void assignNullDouble(int keyIndex, int index) { + isNull[keyIndex] = true; + doubleValues[index] = 0; // assign 0 to simplify hashcode } public void assignString(int index, byte[] bytes, int start, int length) { + Preconditions.checkState(bytes != null); byteValues[index] = bytes; byteStarts[index] = start; byteLengths[index] = length; - isNull[longValues.length + doubleValues.length + index] = false; } - public void assignNullString(int index) { - // We do not assign the value to byteValues[] because the value is never used on null - isNull[longValues.length + doubleValues.length + index] = true; + public void assignNullString(int keyIndex, int index) { + isNull[keyIndex] = true; + byteValues[index] = null; + byteStarts[index] = 0; + // We need some value that indicates NULL. + byteLengths[index] = -1; } public void assignDecimal(int index, HiveDecimalWritable value) { decimalValues[index].set(value); - isNull[longValues.length + doubleValues.length + byteValues.length + index] = false; } - public void assignNullDecimal(int index) { - isNull[longValues.length + doubleValues.length + byteValues.length + index] = true; + public void assignNullDecimal(int keyIndex, int index) { + isNull[keyIndex] = true; + decimalValues[index].set(HiveDecimal.ZERO); // assign 0 to simplify hashcode } public void assignTimestamp(int index, Timestamp value) { timestampValues[index] = value; - isNull[longValues.length + doubleValues.length + byteValues.length + - decimalValues.length + index] = false; } public void assignTimestamp(int index, TimestampColumnVector colVector, int elementNum) { colVector.timestampUpdate(timestampValues[index], elementNum); - isNull[longValues.length + doubleValues.length + byteValues.length + - decimalValues.length + index] = false; } - public void assignNullTimestamp(int index) { - isNull[longValues.length + doubleValues.length + byteValues.length + - decimalValues.length + index] = true; + public void assignNullTimestamp(int keyIndex, int index) { + isNull[keyIndex] = true; + timestampValues[index] = ZERO_TIMESTAMP; // assign 0 to simplify hashcode } public void assignIntervalDayTime(int index, HiveIntervalDayTime value) { intervalDayTimeValues[index].set(value); - isNull[longValues.length + doubleValues.length + byteValues.length + - decimalValues.length + timestampValues.length + index] = false; } public void assignIntervalDayTime(int index, IntervalDayTimeColumnVector colVector, int elementNum) { intervalDayTimeValues[index].set(colVector.asScratchIntervalDayTime(elementNum)); - isNull[longValues.length + doubleValues.length + byteValues.length + - decimalValues.length + timestampValues.length + index] = false; } - public void assignNullIntervalDayTime(int index) { - isNull[longValues.length + doubleValues.length + byteValues.length + - decimalValues.length + timestampValues.length + index] = true; + public void assignNullIntervalDayTime(int keyIndex, int index) { + isNull[keyIndex] = true; + intervalDayTimeValues[index] = ZERO_INTERVALDAYTIME; // assign 0 to simplify hashcode } @Override public String toString() { - return String.format("%d[%s] %d[%s] %d[%s] %d[%s] %d[%s] %d[%s]", - longValues.length, Arrays.toString(longValues), - doubleValues.length, Arrays.toString(doubleValues), - byteValues.length, Arrays.toString(byteValues), - decimalValues.length, Arrays.toString(decimalValues), - timestampValues.length, Arrays.toString(timestampValues), - intervalDayTimeValues.length, Arrays.toString(intervalDayTimeValues)); - } - - public boolean getIsLongNull(int i) { - return isNull[i]; - } + StringBuilder sb = new StringBuilder(); + boolean isFirst = true; + if (longValues.length > 0) { + isFirst = false; + sb.append("longs "); + sb.append(Arrays.toString(longValues)); + } + if (doubleValues.length > 0) { + if (isFirst) { + isFirst = false; + } else { + sb.append(", "); + } + sb.append("doubles "); + sb.append(Arrays.toString(doubleValues)); + } + if (byteValues.length > 0) { + if (isFirst) { + isFirst = false; + } else { + sb.append(", "); + } + sb.append("byte lengths "); + sb.append(Arrays.toString(byteLengths)); + } + if (decimalValues.length > 0) { + if (isFirst) { + isFirst = false; + } else { + sb.append(", "); + } + sb.append("decimals "); + sb.append(Arrays.toString(decimalValues)); + } + if (timestampValues.length > 0) { + if (isFirst) { + isFirst = false; + } else { + sb.append(", "); + } + sb.append("timestamps "); + sb.append(Arrays.toString(timestampValues)); + } + if (intervalDayTimeValues.length > 0) { + if (isFirst) { + isFirst = false; + } else { + sb.append(", "); + } + sb.append("interval day times "); + sb.append(Arrays.toString(intervalDayTimeValues)); + } - public boolean getIsDoubleNull(int i) { - return isNull[longValues.length + i]; - } + if (isFirst) { + isFirst = false; + } else { + sb.append(", "); + } + sb.append("nulls "); + sb.append(Arrays.toString(isNull)); - public boolean getIsBytesNull(int i) { - return isNull[longValues.length + doubleValues.length + i]; + return sb.toString(); } - public long getLongValue(int i) { return longValues[i]; } @@ -390,30 +428,23 @@ public int getVariableSize() { return variableSize; } - public boolean getIsDecimalNull(int i) { - return isNull[longValues.length + doubleValues.length + byteValues.length + i]; - } - public HiveDecimalWritable getDecimal(int i) { return decimalValues[i]; } - public boolean getIsTimestampNull(int i) { - return isNull[longValues.length + doubleValues.length + byteValues.length + - decimalValues.length + i]; - } - public Timestamp getTimestamp(int i) { return timestampValues[i]; } - public boolean getIsIntervalDayTimeNull(int i) { - return isNull[longValues.length + doubleValues.length + byteValues.length + - decimalValues.length + timestampValues.length + i]; - } - public HiveIntervalDayTime getIntervalDayTime(int i) { return intervalDayTimeValues[i]; } -} + public void clearIsNull() { + Arrays.fill(isNull, false); + } + + public boolean isNull(int keyIndex) { + return isNull[keyIndex]; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java index b4708b5..c23d437 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java @@ -23,6 +23,9 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; /** * Class for handling vectorized hash map key wrappers. It evaluates the key columns in a @@ -85,12 +88,168 @@ public int getKeysFixedSize() { * @throws HiveException */ public void evaluateBatch(VectorizedRowBatch batch) throws HiveException { - for(int i = 0; i < keyExpressions.length; ++i) { - keyExpressions[i].evaluate(batch); + + for(int i=0;i= 0) { - return kw.getIsLongNull(klh.longIndex) ? null : - keyOutputWriter.writeValue(kw.getLongValue(klh.longIndex)); - } else if (klh.doubleIndex >= 0) { - return kw.getIsDoubleNull(klh.doubleIndex) ? null : - keyOutputWriter.writeValue(kw.getDoubleValue(klh.doubleIndex)); - } else if (klh.stringIndex >= 0) { - return kw.getIsBytesNull(klh.stringIndex) ? null : - keyOutputWriter.writeValue( - kw.getBytes(klh.stringIndex), - kw.getByteStart(klh.stringIndex), - kw.getByteLength(klh.stringIndex)); - } else if (klh.decimalIndex >= 0) { - return kw.getIsDecimalNull(klh.decimalIndex)? null : - keyOutputWriter.writeValue( - kw.getDecimal(klh.decimalIndex)); - } else if (klh.timestampIndex >= 0) { - return kw.getIsTimestampNull(klh.timestampIndex)? null : - keyOutputWriter.writeValue( - kw.getTimestamp(klh.timestampIndex)); - } else if (klh.intervalDayTimeIndex >= 0) { - return kw.getIsIntervalDayTimeNull(klh.intervalDayTimeIndex)? null : - keyOutputWriter.writeValue( - kw.getIntervalDayTime(klh.intervalDayTimeIndex)); - } else { - throw new HiveException(String.format( - "Internal inconsistent KeyLookupHelper at index [%d]:%d %d %d %d %d %d", - i, klh.longIndex, klh.doubleIndex, klh.stringIndex, klh.decimalIndex, - klh.timestampIndex, klh.intervalDayTimeIndex)); + if (kw.isNull(keyIndex)) { + return null; + } + + ColumnVector.Type columnVectorType = columnVectorTypes[keyIndex]; + int columnTypeSpecificIndex = columnTypeSpecificIndices[keyIndex]; + + switch (columnVectorType) { + case LONG: + return keyOutputWriter.writeValue( + kw.getLongValue(columnTypeSpecificIndex)); + case DOUBLE: + return keyOutputWriter.writeValue( + kw.getDoubleValue(columnTypeSpecificIndex)); + case BYTES: + return keyOutputWriter.writeValue( + kw.getBytes(columnTypeSpecificIndex), + kw.getByteStart(columnTypeSpecificIndex), + kw.getByteLength(columnTypeSpecificIndex)); + case DECIMAL: + return keyOutputWriter.writeValue( + kw.getDecimal(columnTypeSpecificIndex)); + case TIMESTAMP: + return keyOutputWriter.writeValue( + kw.getTimestamp(columnTypeSpecificIndex)); + case INTERVAL_DAY_TIME: + return keyOutputWriter.writeValue( + kw.getIntervalDayTime(columnTypeSpecificIndex)); + default: + throw new HiveException("Unexpected column vector type " + columnVectorType); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java index 848fc8e..4e05fa3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java @@ -215,6 +215,9 @@ public void process(Object row, int tag) throws HiveException { } } + for (VectorExpression ve : keyExpressions) { + ve.evaluate(inBatch); + } keyWrapperBatch.evaluateBatch(inBatch); keyValues = keyWrapperBatch.getVectorHashKeyWrappers(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java index ac3363e..f8c4223 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java @@ -257,6 +257,9 @@ public void process(Object row, int tag) throws HiveException { } } + for (VectorExpression ve : keyExpressions) { + ve.evaluate(inBatch); + } keyWrapperBatch.evaluateBatch(inBatch); keyValues = keyWrapperBatch.getVectorHashKeyWrappers(); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index e3d9d7f..cba0424 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -1622,14 +1622,6 @@ private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, bo GroupByDesc desc = op.getConf(); VectorGroupByDesc vectorDesc = desc.getVectorDesc(); - if (desc.isGroupingSetsPresent()) { - LOG.info("Grouping sets not supported in vector mode"); - return false; - } - if (desc.pruneGroupingSetId()) { - LOG.info("Pruning grouping set id not supported in vector mode"); - return false; - } if (desc.getMode() != GroupByDesc.Mode.HASH && desc.isDistinct()) { LOG.info("DISTINCT not supported in vector mode"); return false; @@ -1940,6 +1932,10 @@ private boolean validateAggregationIsPrimitive(VectorAggregateExpression vectorA LOG.info("Cannot vectorize groupby aggregate expression: UDF " + udfName + " not supported"); return new Pair(false, false); } + if (aggDesc.getDistinct()) { + LOG.info("Cannot vectorize groupby aggregate expression: DISTINCT"); + return new Pair(false, false); + } if (aggDesc.getParameters() != null && !validateExprNodeDesc(aggDesc.getParameters())) { LOG.info("Cannot vectorize groupby aggregate expression: UDF parameters not supported"); return new Pair(false, false); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java index 99791e5..113de86 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java @@ -265,6 +265,11 @@ public boolean isDistinctLike() { return true; } + @Explain(displayName = "grouping sets") + public List getDisplayGroupingSets() { + return (groupingSetsPresent ? listGroupingSets : null); + } + // Consider a query like: // select a, b, count(distinct c) from T group by a,b with rollup; // Assume that hive.map.aggr is set to true and hive.groupby.skewindata is false, diff --git ql/src/test/queries/clientpositive/groupby_grouping_id1.q ql/src/test/queries/clientpositive/groupby_grouping_id1.q index d43ea37..9948ce9 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_id1.q +++ ql/src/test/queries/clientpositive/groupby_grouping_id1.q @@ -2,6 +2,8 @@ CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE; LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +-- SORT_QUERY_RESULTS + SELECT key, val, GROUPING__ID from T1 group by key, val with cube; SELECT key, val, GROUPING__ID from T1 group by cube(key, val); diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets1.q ql/src/test/queries/clientpositive/groupby_grouping_sets1.q index e239a87..4fcfd3b 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets1.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets1.q @@ -1,3 +1,6 @@ + +-- SORT_QUERY_RESULTS + CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1; diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets2.q ql/src/test/queries/clientpositive/groupby_grouping_sets2.q index b470964..af5bbe6 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets2.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets2.q @@ -1,6 +1,8 @@ set hive.mapred.mode=nonstrict; set hive.new.job.grouping.set.cardinality=2; +-- SORT_QUERY_RESULTS + CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1; diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets3.q ql/src/test/queries/clientpositive/groupby_grouping_sets3.q index 3c1a5e7..ef4a7aa 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets3.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets3.q @@ -1,3 +1,6 @@ + +-- SORT_QUERY_RESULTS + -- In this test, 2 files are loaded into table T1. The data contains rows with the same value of a and b, -- with different number of rows for a and b in each file. Since bucketizedHiveInputFormat is used, -- this tests that the aggregate function stores the partial aggregate state correctly even if an diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets5.q ql/src/test/queries/clientpositive/groupby_grouping_sets5.q index c1c98b3..570d464 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets5.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets5.q @@ -7,6 +7,8 @@ CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMIN LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1; +-- SORT_QUERY_RESULTS + -- This tests that cubes and rollups work fine where the source is a sub-query EXPLAIN SELECT a, b, count(*) FROM diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets6.q ql/src/test/queries/clientpositive/groupby_grouping_sets6.q index 5cdb4a5..e537bce 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets6.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets6.q @@ -3,6 +3,8 @@ CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMIN LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1; +-- SORT_QUERY_RESULTS + set hive.optimize.ppd = false; -- This filter is not pushed down diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q index 1b753e1..12d2a56 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q @@ -1,3 +1,6 @@ + +-- SORT_QUERY_RESULTS + CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE; LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1; diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets_limit.q ql/src/test/queries/clientpositive/groupby_grouping_sets_limit.q index db88d5f..b6c5143 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets_limit.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets_limit.q @@ -1,3 +1,6 @@ + +-- SORT_QUERY_RESULTS + CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1; diff --git ql/src/test/queries/clientpositive/vector_groupby_cube1.q ql/src/test/queries/clientpositive/vector_groupby_cube1.q new file mode 100644 index 0000000..fd2f0de --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_cube1.q @@ -0,0 +1,55 @@ +set hive.mapred.mode=nonstrict; +set hive.map.aggr=true; +set hive.groupby.skewindata=false; + +-- SORT_QUERY_RESULTS + +CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; + +EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube; +EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY CUBE(key, val); + +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube; + +EXPLAIN +SELECT key, val, GROUPING__ID, count(1) FROM T1 GROUP BY key, val with cube; + +SELECT key, val, GROUPING__ID, count(1) FROM T1 GROUP BY key, val with cube; + +EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube; + +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube; + +set hive.groupby.skewindata=true; + +EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube; + +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube; + +EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube; + +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube; + + +set hive.multigroupby.singlereducer=true; + +CREATE TABLE T2(key1 STRING, key2 STRING, val INT) STORED AS TEXTFILE; +CREATE TABLE T3(key1 STRING, key2 STRING, val INT) STORED AS TEXTFILE; + +EXPLAIN +FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube; + + +FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube; + diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_id1.q ql/src/test/queries/clientpositive/vector_groupby_grouping_id1.q new file mode 100644 index 0000000..2c9bd3d --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_id1.q @@ -0,0 +1,22 @@ +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +CREATE TABLE T1_text(key STRING, val STRING) STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +-- SORT_QUERY_RESULTS + +SELECT key, val, GROUPING__ID from T1 group by key, val with cube; +SELECT key, val, GROUPING__ID from T1 group by cube(key, val); + +SELECT GROUPING__ID, key, val from T1 group by key, val with rollup; +SELECT GROUPING__ID, key, val from T1 group by rollup (key, val); + +SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by key, val with cube; +SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by cube(key, val); + diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_id2.q ql/src/test/queries/clientpositive/vector_groupby_grouping_id2.q new file mode 100644 index 0000000..ebebc2e --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_id2.q @@ -0,0 +1,64 @@ +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +set hive.groupby.skewindata = true; + +-- SORT_QUERY_RESULTS + +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP; +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY ROLLUP (key, value); + +SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID; + +SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY ROLLUP(key, value) +) t +GROUP BY GROUPING__ID; + + +SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID; + +SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY ROLLUP(key,value)) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY ROLLUP(key, value)) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID; + + + + + +set hive.groupby.skewindata = false; + +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP; + +SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID; + +SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID; + + diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_id3.q ql/src/test/queries/clientpositive/vector_groupby_grouping_id3.q new file mode 100644 index 0000000..29e9211 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_id3.q @@ -0,0 +1,42 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +set hive.cbo.enable = false; + +-- SORT_QUERY_RESULTS + +EXPLAIN +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1; +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1; + +set hive.cbo.enable = true; + +EXPLAIN +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1; +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1; + diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets1.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets1.q new file mode 100644 index 0000000..f8561fa --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets1.q @@ -0,0 +1,29 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +-- SORT_QUERY_RESULTS + +CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +SELECT * FROM T1; + +SELECT a, b, count(*) from T1 group by a, b with cube; +SELECT a, b, count(*) from T1 group by cube(a, b); + +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()); + +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)); + +SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c); + +SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)); + +SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b); + diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets2.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets2.q new file mode 100644 index 0000000..158612c --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets2.q @@ -0,0 +1,36 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; +set hive.mapred.mode=nonstrict; +set hive.new.job.grouping.set.cardinality=2; + +-- SORT_QUERY_RESULTS + +CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +-- Since 4 grouping sets would be generated for the query below, an additional MR job should be created +EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube; + +EXPLAIN +SELECT a, b, count(*) from T1 group by cube(a, b); +SELECT a, b, count(*) from T1 group by a, b with cube; + +EXPLAIN +SELECT a, b, sum(c) from T1 group by a, b with cube; +SELECT a, b, sum(c) from T1 group by a, b with cube; + +CREATE TABLE T2(a STRING, b STRING, c int, d int) STORED AS ORC; + +INSERT OVERWRITE TABLE T2 +SELECT a, b, c, c from T1; + +EXPLAIN +SELECT a, b, sum(c+d) from T2 group by a, b with cube; +SELECT a, b, sum(c+d) from T2 group by a, b with cube; diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets3.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets3.q new file mode 100644 index 0000000..d299279 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets3.q @@ -0,0 +1,42 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; +set hive.mapred.mode=nonstrict; +set hive.new.job.grouping.set.cardinality=2; + +-- SORT_QUERY_RESULTS + +-- In this test, 2 files are loaded into table T1. The data contains rows with the same value of a and b, +-- with different number of rows for a and b in each file. Since bucketizedHiveInputFormat is used, +-- this tests that the aggregate function stores the partial aggregate state correctly even if an +-- additional MR job is created for processing the grouping sets. +CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets1.txt' INTO TABLE T1_text; +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets2.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; +set hive.new.job.grouping.set.cardinality = 30; + +-- The query below will execute in a single MR job, since 4 rows are generated per input row +-- (cube of a,b will lead to (a,b), (a, null), (null, b) and (null, null) and +-- hive.new.job.grouping.set.cardinality is more than 4. +EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by a, b with cube; + +EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by cube(a, b); +SELECT a, b, avg(c), count(*) from T1 group by a, b with cube; + +set hive.new.job.grouping.set.cardinality=2; + +-- The query below will execute in 2 MR jobs, since hive.new.job.grouping.set.cardinality is set to 2. +-- The partial aggregation state should be maintained correctly across MR jobs. +EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by a, b with cube; +SELECT a, b, avg(c), count(*) from T1 group by a, b with cube; + diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets4.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets4.q new file mode 100644 index 0000000..ef0d832 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets4.q @@ -0,0 +1,57 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; +set hive.mapred.mode=nonstrict; +set hive.merge.mapfiles = false; +set hive.merge.mapredfiles = false; + +-- SORT_QUERY_RESULTS + +-- Set merging to false above to make the explain more readable + +CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +-- This tests that cubes and rollups work fine inside sub-queries. +EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a; + +EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by cube(a, b) ) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by cube(a, b) ) subq2 +on subq1.a = subq2.a; + +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a; + +set hive.new.job.grouping.set.cardinality=2; + +-- Since 4 grouping sets would be generated for each sub-query, an additional MR job should be created +-- for each of them +EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a; + +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a; + diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets5.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets5.q new file mode 100644 index 0000000..15be3f3 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets5.q @@ -0,0 +1,39 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; +set hive.mapred.mode=nonstrict; +set hive.merge.mapfiles = false; +set hive.merge.mapredfiles = false; +-- Set merging to false above to make the explain more readable + +CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +-- SORT_QUERY_RESULTS + +-- This tests that cubes and rollups work fine where the source is a sub-query +EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube; + +EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by cube(a, b); + +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube; + +set hive.new.job.grouping.set.cardinality=2; + +-- Since 4 grouping sets would be generated for the cube, an additional MR job should be created +EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube; + +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube; diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets6.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets6.q new file mode 100644 index 0000000..72c2078 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets6.q @@ -0,0 +1,38 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; +set hive.mapred.mode=nonstrict; + +CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +-- SORT_QUERY_RESULTS + +set hive.optimize.ppd = false; + +-- This filter is not pushed down +EXPLAIN +SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5; + +SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5; + +set hive.cbo.enable = true; + +-- This filter is pushed down through aggregate with grouping sets by Calcite +EXPLAIN +SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5; + +SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5; diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q new file mode 100644 index 0000000..7b7c892 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q @@ -0,0 +1,99 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +-- SORT_QUERY_RESULTS + +explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value); + +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value); + +explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value); + +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value); + +explain +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1; + +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1; + +explain +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end; + +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end; + +set hive.cbo.enable=false; + +explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value); + +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value); + +explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value); + +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value); + +explain +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1; + +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1; + +explain +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end; + +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end; diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_limit.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_limit.q new file mode 100644 index 0000000..00649f7 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_limit.q @@ -0,0 +1,42 @@ +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +-- SORT_QUERY_RESULTS + +EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube LIMIT 10; + +SELECT a, b, count(*) from T1 group by a, b with cube LIMIT 10; + +EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) LIMIT 10; + +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) LIMIT 10; + +EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) LIMIT 10; + +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) LIMIT 10; + +EXPLAIN +SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) LIMIT 10; + +SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) LIMIT 10; + +EXPLAIN +SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) LIMIT 10; + +SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) LIMIT 10; + +EXPLAIN +SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) LIMIT 10; + +SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) LIMIT 10; diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_window.q ql/src/test/queries/clientpositive/vector_groupby_grouping_window.q new file mode 100644 index 0000000..7d75433 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_window.q @@ -0,0 +1,20 @@ +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +create table t(category int, live int, comments int) stored as orc; +insert into table t select key, 0, 2 from src tablesample(3 rows); + +explain +select category, max(live) live, max(comments) comments, rank() OVER (PARTITION BY category ORDER BY comments) rank1 +FROM t +GROUP BY category +GROUPING SETS ((), (category)) +HAVING max(comments) > 0; + +select category, max(live) live, max(comments) comments, rank() OVER (PARTITION BY category ORDER BY comments) rank1 +FROM t +GROUP BY category +GROUPING SETS ((), (category)) +HAVING max(comments) > 0; diff --git ql/src/test/results/clientpositive/annotate_stats_groupby.q.out ql/src/test/results/clientpositive/annotate_stats_groupby.q.out index a8e4854..d2a7a1e 100644 --- ql/src/test/results/clientpositive/annotate_stats_groupby.q.out +++ ql/src/test/results/clientpositive/annotate_stats_groupby.q.out @@ -300,6 +300,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 0, 1, 2, 3 keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -350,6 +351,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 0, 1, 3 keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -400,6 +402,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 0, 1, 3 keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -450,6 +453,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 1 keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -500,6 +504,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 1, 2 keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -550,6 +555,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 1, 2, 0 keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -600,6 +606,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 0, 1, 2, 3 keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -699,6 +706,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 0, 1, 2, 3 keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -798,6 +806,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 2, 3 keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -848,6 +857,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 3 keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -898,6 +908,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 3 keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -948,6 +959,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 1 keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -998,6 +1010,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 1, 2 keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -1048,6 +1061,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 1, 2, 0 keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -1098,6 +1112,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 2, 3 keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -1197,6 +1212,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 2, 3 keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 diff --git ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out index 31c4ed1..74c4d53 100644 --- ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out +++ ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out @@ -103,6 +103,7 @@ STAGE PLANS: outputColumnNames: state, country Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 2, 3 keys: state (type: string), country (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -251,6 +252,7 @@ STAGE PLANS: outputColumnNames: state, country Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 0, 1, 2, 3 keys: state (type: string), country (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -350,6 +352,7 @@ STAGE PLANS: outputColumnNames: state, country Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 0, 1, 2, 3 keys: state (type: string), country (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 diff --git ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out index f260f03..b87a9be 100644 --- ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out +++ ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out @@ -300,6 +300,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 0, 1, 2, 3 keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -353,6 +354,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 0, 1, 3 keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -406,6 +408,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 0, 1, 3 keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -459,6 +462,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 1 keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -512,6 +516,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 1, 2 keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -565,6 +570,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 0, 1, 2 keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -618,6 +624,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 0, 1, 2, 3 keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -720,6 +727,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 0, 1, 2, 3 keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -822,6 +830,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 2, 3 keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -875,6 +884,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 3 keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -928,6 +938,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 3 keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -981,6 +992,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 1 keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -1034,6 +1046,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 1, 2 keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -1087,6 +1100,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 2 keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -1140,6 +1154,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 2, 3 keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -1242,6 +1257,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 2, 3 keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 diff --git ql/src/test/results/clientpositive/groupby_cube1.q.out ql/src/test/results/clientpositive/groupby_cube1.q.out index 0486b68..6afc9e3 100644 --- ql/src/test/results/clientpositive/groupby_cube1.q.out +++ ql/src/test/results/clientpositive/groupby_cube1.q.out @@ -37,6 +37,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + grouping sets: 0, 1, 2, 3 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -96,6 +97,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + grouping sets: 0, 1, 2, 3 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -181,6 +183,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + grouping sets: 0, 1, 2, 3 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -265,6 +268,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT val) + grouping sets: 0, 1 keys: key (type: string), 0 (type: int), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -338,6 +342,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + grouping sets: 0, 1, 2, 3 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -448,6 +453,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT val) + grouping sets: 0, 1 keys: key (type: string), 0 (type: int), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -570,6 +576,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + grouping sets: 0, 1, 2, 3 keys: key (type: string), val (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -586,6 +593,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(1) + grouping sets: 0, 1, 2, 3 keys: key (type: string), val (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 diff --git ql/src/test/results/clientpositive/groupby_cube_multi_gby.q.out ql/src/test/results/clientpositive/groupby_cube_multi_gby.q.out index f6e1b17..f720f10 100644 --- ql/src/test/results/clientpositive/groupby_cube_multi_gby.q.out +++ ql/src/test/results/clientpositive/groupby_cube_multi_gby.q.out @@ -50,6 +50,7 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 2, 3 keys: key (type: string), value (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -64,6 +65,7 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 1, 3 keys: key (type: string), value (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 diff --git ql/src/test/results/clientpositive/groupby_grouping_id1.q.out ql/src/test/results/clientpositive/groupby_grouping_id1.q.out index 9ef7615..e390535 100644 --- ql/src/test/results/clientpositive/groupby_grouping_id1.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_id1.q.out @@ -22,24 +22,24 @@ POSTHOOK: query: SELECT key, val, GROUPING__ID from T1 group by key, val with cu POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 0 -NULL 11 2 -NULL 12 2 -NULL 13 2 -NULL 17 2 -NULL 18 2 -NULL 28 2 -1 NULL 1 1 11 3 -2 NULL 1 +1 NULL 1 2 12 3 -3 NULL 1 +2 NULL 1 3 13 3 -7 NULL 1 +3 NULL 1 7 17 3 -8 NULL 1 +7 NULL 1 8 18 3 8 28 3 +8 NULL 1 +NULL 11 2 +NULL 12 2 +NULL 13 2 +NULL 17 2 +NULL 18 2 +NULL 28 2 +NULL NULL 0 PREHOOK: query: SELECT key, val, GROUPING__ID from T1 group by cube(key, val) PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -48,24 +48,24 @@ POSTHOOK: query: SELECT key, val, GROUPING__ID from T1 group by cube(key, val) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 0 -NULL 11 2 -NULL 12 2 -NULL 13 2 -NULL 17 2 -NULL 18 2 -NULL 28 2 -1 NULL 1 1 11 3 -2 NULL 1 +1 NULL 1 2 12 3 -3 NULL 1 +2 NULL 1 3 13 3 -7 NULL 1 +3 NULL 1 7 17 3 -8 NULL 1 +7 NULL 1 8 18 3 8 28 3 +8 NULL 1 +NULL 11 2 +NULL 12 2 +NULL 13 2 +NULL 17 2 +NULL 18 2 +NULL 28 2 +NULL NULL 0 PREHOOK: query: SELECT GROUPING__ID, key, val from T1 group by key, val with rollup PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -76,14 +76,14 @@ POSTHOOK: Input: default@t1 #### A masked pattern was here #### 0 NULL NULL 1 1 NULL -3 1 11 1 2 NULL -3 2 12 1 3 NULL -3 3 13 1 7 NULL -3 7 17 1 8 NULL +3 1 11 +3 2 12 +3 3 13 +3 7 17 3 8 18 3 8 28 PREHOOK: query: SELECT GROUPING__ID, key, val from T1 group by rollup (key, val) @@ -96,14 +96,14 @@ POSTHOOK: Input: default@t1 #### A masked pattern was here #### 0 NULL NULL 1 1 NULL -3 1 11 1 2 NULL -3 2 12 1 3 NULL -3 3 13 1 7 NULL -3 7 17 1 8 NULL +3 1 11 +3 2 12 +3 3 13 +3 7 17 3 8 18 3 8 28 PREHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by key, val with cube @@ -114,24 +114,24 @@ POSTHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 0 0 -NULL 11 2 2 -NULL 12 2 2 -NULL 13 2 2 -NULL 17 2 2 -NULL 18 2 2 -NULL 28 2 2 -1 NULL 1 1 1 11 3 3 -2 NULL 1 1 +1 NULL 1 1 2 12 3 3 -3 NULL 1 1 +2 NULL 1 1 3 13 3 3 -7 NULL 1 1 +3 NULL 1 1 7 17 3 3 -8 NULL 1 1 +7 NULL 1 1 8 18 3 3 8 28 3 3 +8 NULL 1 1 +NULL 11 2 2 +NULL 12 2 2 +NULL 13 2 2 +NULL 17 2 2 +NULL 18 2 2 +NULL 28 2 2 +NULL NULL 0 0 PREHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by cube(key, val) PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -140,21 +140,21 @@ POSTHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 0 0 -NULL 11 2 2 -NULL 12 2 2 -NULL 13 2 2 -NULL 17 2 2 -NULL 18 2 2 -NULL 28 2 2 -1 NULL 1 1 1 11 3 3 -2 NULL 1 1 +1 NULL 1 1 2 12 3 3 -3 NULL 1 1 +2 NULL 1 1 3 13 3 3 -7 NULL 1 1 +3 NULL 1 1 7 17 3 3 -8 NULL 1 1 +7 NULL 1 1 8 18 3 3 8 28 3 3 +8 NULL 1 1 +NULL 11 2 2 +NULL 12 2 2 +NULL 13 2 2 +NULL 17 2 2 +NULL 18 2 2 +NULL 28 2 2 +NULL NULL 0 0 diff --git ql/src/test/results/clientpositive/groupby_grouping_sets1.q.out ql/src/test/results/clientpositive/groupby_grouping_sets1.q.out index aebba0d..c685313 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets1.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets1.q.out @@ -22,12 +22,12 @@ POSTHOOK: query: SELECT * FROM T1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -8 1 1 -5 2 2 1 1 3 2 2 4 2 3 5 3 2 8 +5 2 2 +8 1 1 PREHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -36,21 +36,21 @@ POSTHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 6 -NULL 1 2 -NULL 2 3 -NULL 3 1 -1 NULL 1 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 -3 NULL 1 +2 NULL 2 3 2 1 -5 NULL 1 +3 NULL 1 5 2 1 -8 NULL 1 +5 NULL 1 8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 PREHOOK: query: SELECT a, b, count(*) from T1 group by cube(a, b) PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -59,21 +59,21 @@ POSTHOOK: query: SELECT a, b, count(*) from T1 group by cube(a, b) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 6 -NULL 1 2 -NULL 2 3 -NULL 3 1 -1 NULL 1 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 -3 NULL 1 +2 NULL 2 3 2 1 -5 NULL 1 +3 NULL 1 5 2 1 -8 NULL 1 +5 NULL 1 8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 PREHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -82,21 +82,21 @@ POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 6 -NULL 1 2 -NULL 2 3 -NULL 3 1 -1 NULL 1 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 -3 NULL 1 +2 NULL 2 3 2 1 -5 NULL 1 +3 NULL 1 5 2 1 -8 NULL 1 +5 NULL 1 8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 PREHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -105,17 +105,17 @@ POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, ( POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -1 NULL 1 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 -3 NULL 1 +2 NULL 2 3 2 1 -5 NULL 1 +3 NULL 1 5 2 1 -8 NULL 1 +5 NULL 1 8 1 1 +8 NULL 1 PREHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -124,6 +124,11 @@ POSTHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### +1 +2 +3 +5 +8 NULL NULL NULL @@ -133,11 +138,6 @@ NULL NULL NULL NULL -1 -2 -3 -5 -8 PREHOOK: query: SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) PREHOOK: type: QUERY PREHOOK: Input: default@t1 diff --git ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out index b4f8ce7..4cd6d72 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out @@ -51,6 +51,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + grouping sets: 0, 1, 2, 3 keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 @@ -135,6 +136,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + grouping sets: 0, 1, 2, 3 keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 @@ -190,21 +192,21 @@ POSTHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 6 -NULL 1 2 -NULL 2 3 -NULL 3 1 -1 NULL 1 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 -3 NULL 1 +2 NULL 2 3 2 1 -5 NULL 1 +3 NULL 1 5 2 1 -8 NULL 1 +5 NULL 1 8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 PREHOOK: query: EXPLAIN SELECT a, b, sum(c) from T1 group by a, b with cube PREHOOK: type: QUERY @@ -242,6 +244,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + grouping sets: 0, 1, 2, 3 keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 @@ -297,21 +300,21 @@ POSTHOOK: query: SELECT a, b, sum(c) from T1 group by a, b with cube POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 23.0 -NULL 1 4.0 -NULL 2 14.0 -NULL 3 5.0 -1 NULL 3.0 1 1 3.0 -2 NULL 9.0 +1 NULL 3.0 2 2 4.0 2 3 5.0 -3 NULL 8.0 +2 NULL 9.0 3 2 8.0 -5 NULL 2.0 +3 NULL 8.0 5 2 2.0 -8 NULL 1.0 +5 NULL 2.0 8 1 1.0 +8 NULL 1.0 +NULL 1 4.0 +NULL 2 14.0 +NULL 3 5.0 +NULL NULL 23.0 PREHOOK: query: CREATE TABLE T2(a STRING, b STRING, c int, d int) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -371,6 +374,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + grouping sets: 0, 1, 2, 3 keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 @@ -426,18 +430,18 @@ POSTHOOK: query: SELECT a, b, sum(c+d) from T2 group by a, b with cube POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 #### A masked pattern was here #### -NULL NULL 46 -NULL 1 8 -NULL 2 28 -NULL 3 10 -1 NULL 6 1 1 6 -2 NULL 18 +1 NULL 6 2 2 8 2 3 10 -3 NULL 16 +2 NULL 18 3 2 16 -5 NULL 4 +3 NULL 16 5 2 4 -8 NULL 2 +5 NULL 4 8 1 2 +8 NULL 2 +NULL 1 8 +NULL 2 28 +NULL 3 10 +NULL NULL 46 diff --git ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out index 67cbdcd..5e42b82 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out @@ -45,6 +45,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(c), count() + grouping sets: 0, 1, 2, 3 keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -104,6 +105,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(c), count() + grouping sets: 0, 1, 2, 3 keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -148,22 +150,22 @@ POSTHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 3.8333333333333335 12 -NULL 1 2.0 5 -NULL 2 5.2 5 -NULL 3 5.0 2 -1 NULL 2.6666666666666665 3 1 1 3.0 2 1 2 2.0 1 -2 NULL 5.2 5 +1 NULL 2.6666666666666665 3 2 2 5.333333333333333 3 2 3 5.0 2 -3 NULL 8.0 1 +2 NULL 5.2 5 3 2 8.0 1 -5 NULL 2.0 1 +3 NULL 8.0 1 5 1 2.0 1 -8 NULL 1.0 2 +5 NULL 2.0 1 8 1 1.0 2 +8 NULL 1.0 2 +NULL 1 2.0 5 +NULL 2 5.2 5 +NULL 3 5.0 2 +NULL NULL 3.8333333333333335 12 PREHOOK: query: EXPLAIN SELECT a, b, avg(c), count(*) from T1 group by a, b with cube PREHOOK: type: QUERY @@ -201,6 +203,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), count(VALUE._col1) + grouping sets: 0, 1, 2, 3 keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -256,19 +259,19 @@ POSTHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 3.8333333333333335 12 -NULL 1 2.0 5 -NULL 2 5.2 5 -NULL 3 5.0 2 -1 NULL 2.6666666666666665 3 1 1 3.0 2 1 2 2.0 1 -2 NULL 5.2 5 +1 NULL 2.6666666666666665 3 2 2 5.333333333333333 3 2 3 5.0 2 -3 NULL 8.0 1 +2 NULL 5.2 5 3 2 8.0 1 -5 NULL 2.0 1 +3 NULL 8.0 1 5 1 2.0 1 -8 NULL 1.0 2 +5 NULL 2.0 1 8 1 1.0 2 +8 NULL 1.0 2 +NULL 1 2.0 5 +NULL 2 5.2 5 +NULL 3 5.0 2 +NULL NULL 3.8333333333333335 12 diff --git ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out index 5884b54..a43fd9f 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out @@ -46,6 +46,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + grouping sets: 0, 1, 2, 3 keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -123,6 +124,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + grouping sets: 0, 1, 2, 3 keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -193,6 +195,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + grouping sets: 0, 1, 2, 3 keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -270,6 +273,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + grouping sets: 0, 1, 2, 3 keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -384,6 +388,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + grouping sets: 0, 1, 2, 3 keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 @@ -485,6 +490,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + grouping sets: 0, 1, 2, 3 keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 diff --git ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out index 166f110..b19aa77 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out @@ -56,6 +56,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + grouping sets: 0, 1, 2, 3 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -145,6 +146,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + grouping sets: 0, 1, 2, 3 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -202,21 +204,21 @@ POSTHOOK: query: SELECT a, b, count(*) FROM POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 6 -NULL 1 2 -NULL 2 3 -NULL 3 1 -1 NULL 1 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 -3 NULL 1 +2 NULL 2 3 2 1 -5 NULL 1 +3 NULL 1 5 2 1 -8 NULL 1 +5 NULL 1 8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 PREHOOK: query: EXPLAIN SELECT a, b, count(*) FROM (SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube @@ -284,6 +286,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + grouping sets: 0, 1, 2, 3 keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 @@ -341,18 +344,18 @@ POSTHOOK: query: SELECT a, b, count(*) FROM POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 6 -NULL 1 2 -NULL 2 3 -NULL 3 1 -1 NULL 1 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 -3 NULL 1 +2 NULL 2 3 2 1 -5 NULL 1 +3 NULL 1 5 2 1 -8 NULL 1 +5 NULL 1 8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 diff --git ql/src/test/results/clientpositive/groupby_grouping_sets6.q.out ql/src/test/results/clientpositive/groupby_grouping_sets6.q.out index 16f0871..6797e4a 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets6.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets6.q.out @@ -39,6 +39,7 @@ STAGE PLANS: predicate: (UDFToDouble(a) = 5.0) (type: boolean) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 3, 1 keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -81,8 +82,8 @@ WHERE res.a=5 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -5 NULL 5 2 +5 NULL PREHOOK: query: EXPLAIN SELECT a, b FROM (SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res @@ -108,6 +109,7 @@ STAGE PLANS: predicate: (UDFToDouble(a) = 5.0) (type: boolean) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 3, 1 keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -150,5 +152,5 @@ WHERE res.a=5 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -5 NULL 5 2 +5 NULL diff --git ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out index 62f40cd..f117d4d 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out @@ -40,6 +40,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 3 keys: _col0 (type: int), _col1 (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -85,17 +86,17 @@ group by rollup(key, value) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 0 0 0 +1 1 3 1 1 1 NULL 1 0 1 1 NULL 3 1 1 -1 1 3 1 1 -2 NULL 1 0 1 2 2 3 1 1 +2 NULL 1 0 1 +3 3 3 1 1 3 NULL 1 0 1 3 NULL 3 1 1 -3 3 3 1 1 -4 NULL 1 0 1 4 5 3 1 1 +4 NULL 1 0 1 +NULL NULL 0 0 0 PREHOOK: query: explain select key, value, `grouping__id`, grouping(key), grouping(value) from T1 @@ -122,6 +123,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 2, 3 keys: _col0 (type: int), _col1 (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -167,22 +169,22 @@ group by cube(key, value) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 0 0 0 -NULL NULL 2 1 0 -NULL 1 2 1 0 -NULL 2 2 1 0 -NULL 3 2 1 0 -NULL 5 2 1 0 +1 1 3 1 1 1 NULL 1 0 1 1 NULL 3 1 1 -1 1 3 1 1 -2 NULL 1 0 1 2 2 3 1 1 +2 NULL 1 0 1 +3 3 3 1 1 3 NULL 1 0 1 3 NULL 3 1 1 -3 3 3 1 1 -4 NULL 1 0 1 4 5 3 1 1 +4 NULL 1 0 1 +NULL 1 2 1 0 +NULL 2 2 1 0 +NULL 3 2 1 0 +NULL 5 2 1 0 +NULL NULL 0 0 0 +NULL NULL 2 1 0 PREHOOK: query: explain select key, value from T1 @@ -211,6 +213,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 2, 3 keys: _col0 (type: int), _col1 (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -261,17 +264,17 @@ having grouping(key) = 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL -NULL 1 -NULL 2 -NULL 3 -NULL 5 -1 NULL 1 1 +1 NULL 2 2 -3 NULL 3 3 +3 NULL 4 5 +NULL 1 +NULL 2 +NULL 3 +NULL 5 +NULL NULL PREHOOK: query: explain select key, value, grouping(key)+grouping(value) as x from T1 @@ -303,6 +306,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 2, 3 keys: _col0 (type: int), _col1 (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -376,21 +380,21 @@ order by x desc, case when x = 1 then key end POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -4 5 2 -3 3 2 -3 NULL 2 -2 2 2 1 1 2 -1 NULL 2 -NULL 1 1 -NULL NULL 1 -NULL 5 1 -NULL 3 1 -NULL 2 1 1 NULL 1 +1 NULL 2 +2 2 2 2 NULL 1 +3 3 2 3 NULL 1 +3 NULL 2 +4 5 2 4 NULL 1 +NULL 1 1 +NULL 2 1 +NULL 3 1 +NULL 5 1 +NULL NULL 1 PREHOOK: query: explain select key, value, `grouping__id`, grouping(key), grouping(value) from T1 @@ -417,6 +421,7 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 3 keys: key (type: int), value (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -462,17 +467,17 @@ group by rollup(key, value) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 0 0 0 +1 1 3 1 1 1 NULL 1 0 1 1 NULL 3 1 1 -1 1 3 1 1 -2 NULL 1 0 1 2 2 3 1 1 +2 NULL 1 0 1 +3 3 3 1 1 3 NULL 1 0 1 3 NULL 3 1 1 -3 3 3 1 1 -4 NULL 1 0 1 4 5 3 1 1 +4 NULL 1 0 1 +NULL NULL 0 0 0 PREHOOK: query: explain select key, value, `grouping__id`, grouping(key), grouping(value) from T1 @@ -499,6 +504,7 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 2, 3 keys: key (type: int), value (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -544,22 +550,22 @@ group by cube(key, value) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 0 0 0 -NULL NULL 2 1 0 -NULL 1 2 1 0 -NULL 2 2 1 0 -NULL 3 2 1 0 -NULL 5 2 1 0 +1 1 3 1 1 1 NULL 1 0 1 1 NULL 3 1 1 -1 1 3 1 1 -2 NULL 1 0 1 2 2 3 1 1 +2 NULL 1 0 1 +3 3 3 1 1 3 NULL 1 0 1 3 NULL 3 1 1 -3 3 3 1 1 -4 NULL 1 0 1 4 5 3 1 1 +4 NULL 1 0 1 +NULL 1 2 1 0 +NULL 2 2 1 0 +NULL 3 2 1 0 +NULL 5 2 1 0 +NULL NULL 0 0 0 +NULL NULL 2 1 0 PREHOOK: query: explain select key, value from T1 @@ -588,6 +594,7 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 2, 3 keys: key (type: int), value (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -635,17 +642,17 @@ having grouping(key) = 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL -NULL 1 -NULL 2 -NULL 3 -NULL 5 -1 NULL 1 1 +1 NULL 2 2 -3 NULL 3 3 +3 NULL 4 5 +NULL 1 +NULL 2 +NULL 3 +NULL 5 +NULL NULL PREHOOK: query: explain select key, value, grouping(key)+grouping(value) as x from T1 @@ -677,6 +684,7 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 2, 3 keys: key (type: int), value (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -750,18 +758,18 @@ order by x desc, case when x = 1 then key end POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -4 5 2 -3 3 2 -3 NULL 2 -2 2 2 1 1 2 -1 NULL 2 -NULL 1 1 -NULL NULL 1 -NULL 5 1 -NULL 3 1 -NULL 2 1 1 NULL 1 +1 NULL 2 +2 2 2 2 NULL 1 +3 3 2 3 NULL 1 +3 NULL 2 +4 5 2 4 NULL 1 +NULL 1 1 +NULL 2 1 +NULL 3 1 +NULL 5 1 +NULL NULL 1 diff --git ql/src/test/results/clientpositive/groupby_grouping_sets_limit.q.out ql/src/test/results/clientpositive/groupby_grouping_sets_limit.q.out index e2d9d96..997bb5a 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets_limit.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets_limit.q.out @@ -37,6 +37,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + grouping sets: 0, 1, 2, 3 keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -85,16 +86,16 @@ POSTHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube LIMIT 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 6 -NULL 1 2 -NULL 2 3 -NULL 3 1 -1 NULL 1 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 +2 NULL 2 3 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 PREHOOK: query: EXPLAIN SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) LIMIT 10 PREHOOK: type: QUERY @@ -118,6 +119,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + grouping sets: 0, 1, 2, 3 keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -166,16 +168,16 @@ POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 6 -NULL 1 2 -NULL 2 3 -NULL 3 1 -1 NULL 1 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 +2 NULL 2 3 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 PREHOOK: query: EXPLAIN SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) LIMIT 10 PREHOOK: type: QUERY @@ -199,6 +201,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + grouping sets: 3, 1 keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -247,15 +250,15 @@ POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, ( POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -1 NULL 1 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 -3 NULL 1 +2 NULL 2 3 2 1 -5 NULL 1 +3 NULL 1 5 2 1 +5 NULL 1 8 NULL 1 PREHOOK: query: EXPLAIN SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) LIMIT 10 @@ -279,6 +282,7 @@ STAGE PLANS: outputColumnNames: a, b, c Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 1, 2, 4 keys: a (type: string), b (type: string), c (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -325,6 +329,7 @@ POSTHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) LIMIT POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### +1 NULL NULL NULL @@ -334,7 +339,6 @@ NULL NULL NULL NULL -1 PREHOOK: query: EXPLAIN SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) LIMIT 10 PREHOOK: type: QUERY @@ -357,6 +361,7 @@ STAGE PLANS: outputColumnNames: a Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 1 keys: a (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -427,6 +432,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + grouping sets: 1 keys: _col0 (type: double), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 diff --git ql/src/test/results/clientpositive/groupby_grouping_window.q.out ql/src/test/results/clientpositive/groupby_grouping_window.q.out index 251f4f7..ae07e0c 100644 --- ql/src/test/results/clientpositive/groupby_grouping_window.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_window.q.out @@ -49,6 +49,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(live), max(comments) + grouping sets: 0, 1 keys: category (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 diff --git ql/src/test/results/clientpositive/groupby_rollup1.q.out ql/src/test/results/clientpositive/groupby_rollup1.q.out index 5fd011e..fd96e18 100644 --- ql/src/test/results/clientpositive/groupby_rollup1.q.out +++ ql/src/test/results/clientpositive/groupby_rollup1.q.out @@ -37,6 +37,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + grouping sets: 0, 1, 3 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -116,6 +117,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT val) + grouping sets: 0, 1 keys: key (type: string), 0 (type: int), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -189,6 +191,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + grouping sets: 0, 1, 3 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -293,6 +296,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT val) + grouping sets: 0, 1 keys: key (type: string), 0 (type: int), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -415,6 +419,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + grouping sets: 0, 1, 3 keys: key (type: string), val (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -431,6 +436,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(1) + grouping sets: 0, 1, 3 keys: key (type: string), val (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 diff --git ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out index d740dea..1a890fe 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out @@ -38,6 +38,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + grouping sets: 0, 1, 3 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -1470,6 +1471,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + grouping sets: 0, 1, 2, 3 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -1648,6 +1650,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + grouping sets: 1, 2 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 diff --git ql/src/test/results/clientpositive/limit_pushdown2.q.out ql/src/test/results/clientpositive/limit_pushdown2.q.out index cdd221b..689701b 100644 --- ql/src/test/results/clientpositive/limit_pushdown2.q.out +++ ql/src/test/results/clientpositive/limit_pushdown2.q.out @@ -937,6 +937,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(_col2) + grouping sets: 0, 1, 3 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -1026,6 +1027,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(_col2) + grouping sets: 0, 1, 3 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 diff --git ql/src/test/results/clientpositive/llap/multi_count_distinct_null.q.out ql/src/test/results/clientpositive/llap/multi_count_distinct_null.q.out index 0cbb9de..2d21201 100644 --- ql/src/test/results/clientpositive/llap/multi_count_distinct_null.q.out +++ ql/src/test/results/clientpositive/llap/multi_count_distinct_null.q.out @@ -46,6 +46,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12 Data size: 66 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 1, 2, 4 keys: _col0 (type: int), _col1 (type: varchar(10)), _col2 (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -187,6 +188,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12 Data size: 66 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 7, 1, 6, 2, 4 keys: _col0 (type: varchar(10)), _col1 (type: int), _col2 (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 diff --git ql/src/test/results/clientpositive/llap/vector_count.q.out ql/src/test/results/clientpositive/llap/vector_count.q.out index 9ef5c2b..7c71355 100644 --- ql/src/test/results/clientpositive/llap/vector_count.q.out +++ ql/src/test/results/clientpositive/llap/vector_count.q.out @@ -80,7 +80,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: bigint) - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap @@ -153,7 +153,7 @@ STAGE PLANS: sort order: ++++ Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint) - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap diff --git ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out new file mode 100644 index 0000000..8552139 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out @@ -0,0 +1,781 @@ +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + grouping sets: 0, 1, 2, 3 + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY CUBE(key, val) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY CUBE(key, val) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + grouping sets: 0, 1, 2, 3 + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 11 1 +1 NULL 1 +2 12 1 +2 NULL 1 +3 13 1 +3 NULL 1 +7 17 1 +7 NULL 1 +8 18 1 +8 28 1 +8 NULL 2 +NULL 11 1 +NULL 12 1 +NULL 13 1 +NULL 17 1 +NULL 18 1 +NULL 28 1 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT key, val, GROUPING__ID, count(1) FROM T1 GROUP BY key, val with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, val, GROUPING__ID, count(1) FROM T1 GROUP BY key, val with cube +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + grouping sets: 0, 1, 2, 3 + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, val, GROUPING__ID, count(1) FROM T1 GROUP BY key, val with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, GROUPING__ID, count(1) FROM T1 GROUP BY key, val with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 11 3 1 +1 NULL 1 1 +2 12 3 1 +2 NULL 1 1 +3 13 3 1 +3 NULL 1 1 +7 17 3 1 +7 NULL 1 1 +8 18 3 1 +8 28 3 1 +8 NULL 1 2 +NULL 11 2 1 +NULL 12 2 1 +NULL 13 2 1 +NULL 17 2 1 +NULL 18 2 1 +NULL 28 2 1 +NULL NULL 0 6 +PREHOOK: query: EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT val) + grouping sets: 0, 1 + keys: key (type: string), 0 (type: int), val (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col2:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 1 +2 1 +3 1 +7 1 +8 2 +NULL 6 +PREHOOK: query: EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + grouping sets: 0, 1, 2, 3 + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 11 1 +1 NULL 1 +2 12 1 +2 NULL 1 +3 13 1 +3 NULL 1 +7 17 1 +7 NULL 1 +8 18 1 +8 28 1 +8 NULL 2 +NULL 11 1 +NULL 12 1 +NULL 13 1 +NULL 17 1 +NULL 18 1 +NULL 28 1 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT val) + grouping sets: 0, 1 + keys: key (type: string), 0 (type: int), val (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col2:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: final + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 1 +2 1 +3 1 +7 1 +8 2 +NULL 6 +PREHOOK: query: CREATE TABLE T2(key1 STRING, key2 STRING, val INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key1 STRING, key2 STRING, val INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: CREATE TABLE T3(key1 STRING, key2 STRING, val INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T3 +POSTHOOK: query: CREATE TABLE T3(key1 STRING, key2 STRING, val INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T3 +PREHOOK: query: EXPLAIN +FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + grouping sets: 0, 1, 2, 3 + keys: key (type: string), val (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(1) + grouping sets: 0, 1, 2, 3 + keys: key (type: string), val (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t2 + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t3 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t2 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t3 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t2 +PREHOOK: Output: default@t3 +POSTHOOK: query: FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t2 +POSTHOOK: Output: default@t3 +POSTHOOK: Lineage: t2.key1 SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: t2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] +POSTHOOK: Lineage: t2.val EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: t3.key1 SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: t3.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] +POSTHOOK: Lineage: t3.val EXPRESSION [(t1)t1.null, ] diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out new file mode 100644 index 0000000..878c83f --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out @@ -0,0 +1,179 @@ +PREHOOK: query: CREATE TABLE T1_text(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.key SIMPLE [(t1_text)t1_text.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: t1.val SIMPLE [(t1_text)t1_text.FieldSchema(name:val, type:string, comment:null), ] +t1_text.key t1_text.val +PREHOOK: query: SELECT key, val, GROUPING__ID from T1 group by key, val with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, GROUPING__ID from T1 group by key, val with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key val grouping__id +1 11 3 +1 NULL 1 +2 12 3 +2 NULL 1 +3 13 3 +3 NULL 1 +7 17 3 +7 NULL 1 +8 18 3 +8 28 3 +8 NULL 1 +NULL 11 2 +NULL 12 2 +NULL 13 2 +NULL 17 2 +NULL 18 2 +NULL 28 2 +NULL NULL 0 +PREHOOK: query: SELECT key, val, GROUPING__ID from T1 group by cube(key, val) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, GROUPING__ID from T1 group by cube(key, val) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key val grouping__id +1 11 3 +1 NULL 1 +2 12 3 +2 NULL 1 +3 13 3 +3 NULL 1 +7 17 3 +7 NULL 1 +8 18 3 +8 28 3 +8 NULL 1 +NULL 11 2 +NULL 12 2 +NULL 13 2 +NULL 17 2 +NULL 18 2 +NULL 28 2 +NULL NULL 0 +PREHOOK: query: SELECT GROUPING__ID, key, val from T1 group by key, val with rollup +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT GROUPING__ID, key, val from T1 group by key, val with rollup +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +grouping__id key val +0 NULL NULL +1 1 NULL +1 2 NULL +1 3 NULL +1 7 NULL +1 8 NULL +3 1 11 +3 2 12 +3 3 13 +3 7 17 +3 8 18 +3 8 28 +PREHOOK: query: SELECT GROUPING__ID, key, val from T1 group by rollup (key, val) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT GROUPING__ID, key, val from T1 group by rollup (key, val) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +grouping__id key val +0 NULL NULL +1 1 NULL +1 2 NULL +1 3 NULL +1 7 NULL +1 8 NULL +3 1 11 +3 2 12 +3 3 13 +3 7 17 +3 8 18 +3 8 28 +PREHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by key, val with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by key, val with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key val grouping__id _c3 +1 11 3 3 +1 NULL 1 1 +2 12 3 3 +2 NULL 1 1 +3 13 3 3 +3 NULL 1 1 +7 17 3 3 +7 NULL 1 1 +8 18 3 3 +8 28 3 3 +8 NULL 1 1 +NULL 11 2 2 +NULL 12 2 2 +NULL 13 2 2 +NULL 17 2 2 +NULL 18 2 2 +NULL 28 2 2 +NULL NULL 0 0 +PREHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by cube(key, val) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by cube(key, val) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key val grouping__id _c3 +1 11 3 3 +1 NULL 1 1 +2 12 3 3 +2 NULL 1 1 +3 13 3 3 +3 NULL 1 1 +7 17 3 3 +7 NULL 1 1 +8 18 3 3 +8 28 3 3 +8 NULL 1 1 +NULL 11 2 2 +NULL 12 2 2 +NULL 13 2 2 +NULL 17 2 2 +NULL 18 2 2 +NULL 28 2 2 +NULL NULL 0 0 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out new file mode 100644 index 0000000..41c6883 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out @@ -0,0 +1,359 @@ +PREHOOK: query: CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.key SIMPLE [(t1_text)t1_text.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: t1.value SIMPLE [(t1_text)t1_text.FieldSchema(name:value, type:int, comment:null), ] +t1_text.key t1_text.value +PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 +1 1 3 1 +1 NULL 1 2 +1 NULL 3 1 +2 2 3 1 +2 NULL 1 1 +3 3 3 1 +3 NULL 1 2 +3 NULL 3 1 +4 5 3 1 +4 NULL 1 1 +NULL NULL 0 6 +PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY ROLLUP (key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY ROLLUP (key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 +1 1 3 1 +1 NULL 1 2 +1 NULL 3 1 +2 2 3 1 +2 NULL 1 1 +3 3 3 1 +3 NULL 1 2 +3 NULL 3 1 +4 5 3 1 +4 NULL 1 1 +NULL NULL 0 6 +PREHOOK: query: SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +grouping__id _c1 +0 1 +1 4 +3 6 +PREHOOK: query: SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY ROLLUP(key, value) +) t +GROUP BY GROUPING__ID +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY ROLLUP(key, value) +) t +GROUP BY GROUPING__ID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +grouping__id _c1 +0 1 +1 4 +3 6 +PREHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +t1.grouping__id t2.grouping__id +0 0 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +PREHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY ROLLUP(key,value)) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY ROLLUP(key, value)) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY ROLLUP(key,value)) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY ROLLUP(key, value)) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +t1.grouping__id t2.grouping__id +0 0 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 +1 1 3 1 +1 NULL 1 2 +1 NULL 3 1 +2 2 3 1 +2 NULL 1 1 +3 3 3 1 +3 NULL 1 2 +3 NULL 3 1 +4 5 3 1 +4 NULL 1 1 +NULL NULL 0 6 +PREHOOK: query: SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +grouping__id _c1 +0 1 +1 4 +3 6 +PREHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +t1.grouping__id t2.grouping__id +0 0 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out new file mode 100644 index 0000000..fd29322 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out @@ -0,0 +1,234 @@ +PREHOOK: query: CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.key SIMPLE [(t1_text)t1_text.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: t1.value SIMPLE [(t1_text)t1_text.FieldSchema(name:value, type:int, comment:null), ] +t1_text.key t1_text.value +PREHOOK: query: EXPLAIN +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + grouping sets: 0, 1 + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col2 = 1) (type: boolean) + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), 1 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), 1 (type: int) + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int), 1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: int), _col1 (type: int), 1 (type: int), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 +1 NULL 1 2 +2 NULL 1 1 +3 NULL 1 2 +4 NULL 1 1 +PREHOOK: query: EXPLAIN +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + grouping sets: 1, 0 + keys: _col0 (type: int), _col1 (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col2 = 1) (type: boolean) + Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), 1 (type: int), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 +1 NULL 1 2 +2 NULL 1 1 +3 NULL 1 2 +4 NULL 1 1 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out new file mode 100644 index 0000000..871b0e3 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out @@ -0,0 +1,188 @@ +PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, type:string, comment:null), ] +t1_text.a t1_text.b t1_text.c +PREHOOK: query: SELECT * FROM T1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM T1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +t1.a t1.b t1.c +1 1 3 +2 2 4 +2 3 5 +3 2 8 +5 2 2 +8 1 1 +PREHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 2 1 +3 NULL 1 +5 2 1 +5 NULL 1 +8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 +PREHOOK: query: SELECT a, b, count(*) from T1 group by cube(a, b) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) from T1 group by cube(a, b) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 2 1 +3 NULL 1 +5 2 1 +5 NULL 1 +8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 +PREHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 2 1 +3 NULL 1 +5 2 1 +5 NULL 1 +8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 +PREHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 2 1 +3 NULL 1 +5 2 1 +5 NULL 1 +8 1 1 +8 NULL 1 +PREHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a +1 +2 +3 +5 +8 +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +PREHOOK: query: SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a +1 +2 +3 +5 +8 +PREHOOK: query: SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +_c0 _c1 +2.0 1 +4.0 1 +5.0 2 +7.0 1 +9.0 1 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out new file mode 100644 index 0000000..2973aba --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out @@ -0,0 +1,473 @@ +PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, type:string, comment:null), ] +t1_text.a t1_text.b t1_text.c +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + grouping sets: 0, 1, 2, 3 + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by cube(a, b) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by cube(a, b) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + grouping sets: 0, 1, 2, 3 + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 2 1 +3 NULL 1 +5 2 1 +5 NULL 1 +8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT a, b, sum(c) from T1 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, sum(c) from T1 group by a, b with cube +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string), c (type: string) + outputColumnNames: a, b, c + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(c) + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + grouping sets: 0, 1, 2, 3 + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: double) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, sum(c) from T1 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, sum(c) from T1 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 3.0 +1 NULL 3.0 +2 2 4.0 +2 3 5.0 +2 NULL 9.0 +3 2 8.0 +3 NULL 8.0 +5 2 2.0 +5 NULL 2.0 +8 1 1.0 +8 NULL 1.0 +NULL 1 4.0 +NULL 2 14.0 +NULL 3 5.0 +NULL NULL 23.0 +PREHOOK: query: CREATE TABLE T2(a STRING, b STRING, c int, d int) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(a STRING, b STRING, c int, d int) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: INSERT OVERWRITE TABLE T2 +SELECT a, b, c, c from T1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t2 +POSTHOOK: query: INSERT OVERWRITE TABLE T2 +SELECT a, b, c, c from T1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: t2.a SIMPLE [(t1)t1.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t2.b SIMPLE [(t1)t1.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t2.c EXPRESSION [(t1)t1.FieldSchema(name:c, type:string, comment:null), ] +POSTHOOK: Lineage: t2.d EXPRESSION [(t1)t1.FieldSchema(name:c, type:string, comment:null), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: EXPLAIN +SELECT a, b, sum(c+d) from T2 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, sum(c+d) from T2 group by a, b with cube +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string), (c + d) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + grouping sets: 0, 1, 2, 3 + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, sum(c+d) from T2 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, sum(c+d) from T2 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +a b _c2 +1 1 6 +1 NULL 6 +2 2 8 +2 3 10 +2 NULL 18 +3 2 16 +3 NULL 16 +5 2 4 +5 NULL 4 +8 1 2 +8 NULL 2 +NULL 1 8 +NULL 2 28 +NULL 3 10 +NULL NULL 46 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out new file mode 100644 index 0000000..15450e9 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out @@ -0,0 +1,317 @@ +PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets1.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets1.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets2.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets2.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, type:string, comment:null), ] +t1_text.a t1_text.b t1_text.c +PREHOOK: query: EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string), c (type: string) + outputColumnNames: a, b, c + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(c), count() + grouping sets: 0, 1, 2, 3 + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 48 Data size: 12240 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 48 Data size: 12240 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: bigint) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double), _col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by cube(a, b) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by cube(a, b) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string), c (type: string) + outputColumnNames: a, b, c + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(c), count() + grouping sets: 0, 1, 2, 3 + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 48 Data size: 12240 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 48 Data size: 12240 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: bigint) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double), _col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 _c3 +1 1 3.0 2 +1 2 2.0 1 +1 NULL 2.6666666666666665 3 +2 2 5.333333333333333 3 +2 3 5.0 2 +2 NULL 5.2 5 +3 2 8.0 1 +3 NULL 8.0 1 +5 1 2.0 1 +5 NULL 2.0 1 +8 1 1.0 2 +8 NULL 1.0 2 +NULL 1 2.0 5 +NULL 2 5.2 5 +NULL 3 5.0 2 +NULL NULL 3.8333333333333335 12 +PREHOOK: query: EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string), c (type: string) + outputColumnNames: a, b, c + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(c), count() + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: bigint) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), count(VALUE._col1) + grouping sets: 0, 1, 2, 3 + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 48 Data size: 12240 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 48 Data size: 12240 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double), _col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 _c3 +1 1 3.0 2 +1 2 2.0 1 +1 NULL 2.6666666666666665 3 +2 2 5.333333333333333 3 +2 3 5.0 2 +2 NULL 5.2 5 +3 2 8.0 1 +3 NULL 8.0 1 +5 1 2.0 1 +5 NULL 2.0 1 +8 1 1.0 2 +8 NULL 1.0 2 +NULL 1 2.0 5 +NULL 2 5.2 5 +NULL 3 5.0 2 +NULL NULL 3.8333333333333335 12 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out new file mode 100644 index 0000000..d3609a9 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out @@ -0,0 +1,560 @@ +PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, type:string, comment:null), ] +t1_text.a t1_text.b t1_text.c +PREHOOK: query: EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(a) < 3.0) (type: boolean) + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + grouping sets: 0, 1, 2, 3 + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(a) < 3.0) (type: boolean) + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + grouping sets: 0, 1, 2, 3 + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 1122 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 1122 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by cube(a, b) ) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by cube(a, b) ) subq2 +on subq1.a = subq2.a +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by cube(a, b) ) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by cube(a, b) ) subq2 +on subq1.a = subq2.a +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(a) < 3.0) (type: boolean) + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + grouping sets: 0, 1, 2, 3 + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(a) < 3.0) (type: boolean) + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + grouping sets: 0, 1, 2, 3 + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 1122 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 1122 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +subq1.a subq1.b subq1._c2 subq2.a subq2.b subq2._c2 +1 1 1 1 1 1 +1 1 1 1 NULL 1 +1 NULL 1 1 1 1 +1 NULL 1 1 NULL 1 +2 2 1 2 2 1 +2 2 1 2 3 1 +2 2 1 2 NULL 2 +2 3 1 2 2 1 +2 3 1 2 3 1 +2 3 1 2 NULL 2 +2 NULL 2 2 2 1 +2 NULL 2 2 3 1 +2 NULL 2 2 NULL 2 +PREHOOK: query: EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(a) < 3.0) (type: boolean) + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(a) < 3.0) (type: boolean) + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + grouping sets: 0, 1, 2, 3 + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 1122 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 1122 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + grouping sets: 0, 1, 2, 3 + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 7 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +subq1.a subq1.b subq1._c2 subq2.a subq2.b subq2._c2 +1 1 1 1 1 1 +1 1 1 1 NULL 1 +1 NULL 1 1 1 1 +1 NULL 1 1 NULL 1 +2 2 1 2 2 1 +2 2 1 2 3 1 +2 2 1 2 NULL 2 +2 3 1 2 2 1 +2 3 1 2 3 1 +2 3 1 2 NULL 2 +2 NULL 2 2 2 1 +2 NULL 2 2 3 1 +2 NULL 2 2 NULL 2 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out new file mode 100644 index 0000000..e283444 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out @@ -0,0 +1,374 @@ +PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, type:string, comment:null), ] +t1_text.a t1_text.b t1_text.c +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + grouping sets: 0, 1, 2, 3 + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by cube(a, b) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by cube(a, b) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + grouping sets: 0, 1, 2, 3 + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 2 1 +3 NULL 1 +5 2 1 +5 NULL 1 +8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + grouping sets: 0, 1, 2, 3 + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 2 1 +3 NULL 1 +5 2 1 +5 NULL 1 +8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out new file mode 100644 index 0000000..45f7eb3 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out @@ -0,0 +1,194 @@ +PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, type:string, comment:null), ] +t1_text.a t1_text.b t1_text.c +PREHOOK: query: EXPLAIN +SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(a) = 5.0) (type: boolean) + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + Group By Operator + grouping sets: 3, 1 + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b +5 2 +5 NULL +PREHOOK: query: EXPLAIN +SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(a) = 5.0) (type: boolean) + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + Group By Operator + grouping sets: 3, 1 + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b +5 2 +5 NULL diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out new file mode 100644 index 0000000..c026f3f --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out @@ -0,0 +1,866 @@ +PREHOOK: query: CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.key SIMPLE [(t1_text)t1_text.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: t1.value SIMPLE [(t1_text)t1_text.FieldSchema(name:value, type:int, comment:null), ] +t1_text.key t1_text.value +PREHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + grouping sets: 0, 1, 3 + keys: _col0 (type: int), _col1 (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 18 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 18 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 _c4 +1 1 3 1 1 +1 NULL 1 0 1 +1 NULL 3 1 1 +2 2 3 1 1 +2 NULL 1 0 1 +3 3 3 1 1 +3 NULL 1 0 1 +3 NULL 3 1 1 +4 5 3 1 1 +4 NULL 1 0 1 +NULL NULL 0 0 0 +PREHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + grouping sets: 0, 1, 2, 3 + keys: _col0 (type: int), _col1 (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 _c4 +1 1 3 1 1 +1 NULL 1 0 1 +1 NULL 3 1 1 +2 2 3 1 1 +2 NULL 1 0 1 +3 3 3 1 1 +3 NULL 1 0 1 +3 NULL 3 1 1 +4 5 3 1 1 +4 NULL 1 0 1 +NULL 1 2 1 0 +NULL 2 2 1 0 +NULL 3 2 1 0 +NULL 5 2 1 0 +NULL NULL 0 0 0 +NULL NULL 2 1 0 +PREHOOK: query: explain +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + grouping sets: 0, 1, 2, 3 + keys: _col0 (type: int), _col1 (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToInteger(grouping(_col2, 1)) = 1) (type: boolean) + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value +1 1 +1 NULL +2 2 +3 3 +3 NULL +4 5 +NULL 1 +NULL 2 +NULL 3 +NULL 5 +NULL NULL +PREHOOK: query: explain +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + grouping sets: 0, 1, 2, 3 + keys: _col0 (type: int), _col1 (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((UDFToInteger(grouping(_col2, 1)) = 1) or (UDFToInteger(grouping(_col2, 0)) = 1)) (type: boolean) + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), (grouping(_col2, 1) + grouping(_col2, 0)) (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: tinyint), CASE WHEN ((_col2 = 1)) THEN (_col0) ELSE (null) END (type: int) + sort order: -+ + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value x +1 1 2 +1 NULL 1 +1 NULL 2 +2 2 2 +2 NULL 1 +3 3 2 +3 NULL 1 +3 NULL 2 +4 5 2 +4 NULL 1 +NULL 1 1 +NULL 2 1 +NULL 3 1 +NULL 5 1 +NULL NULL 1 +PREHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + grouping sets: 0, 1, 3 + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 18 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 18 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 _c4 +1 1 3 1 1 +1 NULL 1 0 1 +1 NULL 3 1 1 +2 2 3 1 1 +2 NULL 1 0 1 +3 3 3 1 1 +3 NULL 1 0 1 +3 NULL 3 1 1 +4 5 3 1 1 +4 NULL 1 0 1 +NULL NULL 0 0 0 +PREHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + grouping sets: 0, 1, 2, 3 + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 _c4 +1 1 3 1 1 +1 NULL 1 0 1 +1 NULL 3 1 1 +2 2 3 1 1 +2 NULL 1 0 1 +3 3 3 1 1 +3 NULL 1 0 1 +3 NULL 3 1 1 +4 5 3 1 1 +4 NULL 1 0 1 +NULL 1 2 1 0 +NULL 2 2 1 0 +NULL 3 2 1 0 +NULL 5 2 1 0 +NULL NULL 0 0 0 +NULL NULL 2 1 0 +PREHOOK: query: explain +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + grouping sets: 0, 1, 2, 3 + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (grouping(_col2, 1) = 1) (type: boolean) + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value +1 1 +1 NULL +2 2 +3 3 +3 NULL +4 5 +NULL 1 +NULL 2 +NULL 3 +NULL 5 +NULL NULL +PREHOOK: query: explain +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + grouping sets: 0, 1, 2, 3 + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((grouping(_col2, 1) = 1) or (grouping(_col2, 0) = 1)) (type: boolean) + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), (grouping(_col2, 1) + grouping(_col2, 0)) (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: tinyint), CASE WHEN ((_col2 = 1)) THEN (_col0) END (type: int) + sort order: -+ + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value x +1 1 2 +1 NULL 1 +1 NULL 2 +2 2 2 +2 NULL 1 +3 3 2 +3 NULL 1 +3 NULL 2 +4 5 2 +4 NULL 1 +NULL 1 1 +NULL 2 1 +NULL 3 1 +NULL 5 1 +NULL NULL 1 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out new file mode 100644 index 0000000..154ce88 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out @@ -0,0 +1,574 @@ +PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, type:string, comment:null), ] +t1_text.a t1_text.b t1_text.c +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube LIMIT 10 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + grouping sets: 0, 1, 2, 3 + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2550 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 2550 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) LIMIT 10 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + grouping sets: 0, 1, 2, 3 + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2550 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 2550 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) LIMIT 10 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + grouping sets: 3, 1 + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 2 1 +3 NULL 1 +5 2 1 +5 NULL 1 +8 NULL 1 +PREHOOK: query: EXPLAIN +SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) LIMIT 10 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string), c (type: string) + outputColumnNames: a, b, c + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + grouping sets: 1, 2, 4 + keys: a (type: string), b (type: string), c (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 18 Data size: 4590 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int) + sort order: ++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int) + Statistics: Num rows: 18 Data size: 4590 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 2295 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 2295 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 9 Data size: 2295 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 2295 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a +1 +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +PREHOOK: query: EXPLAIN +SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) LIMIT 10 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string) + outputColumnNames: a + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + grouping sets: 1 + keys: a (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Limit + Number of rows: 10 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a +1 +2 +3 +5 +8 +PREHOOK: query: EXPLAIN +SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) LIMIT 10 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (UDFToDouble(a) + UDFToDouble(b)) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + grouping sets: 1 + keys: _col0 (type: double), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: double), _col1 (type: int) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: double), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col2 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: double), _col2 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +_c0 _c1 +2.0 1 +4.0 1 +5.0 2 +7.0 1 +9.0 1 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out new file mode 100644 index 0000000..333f071 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out @@ -0,0 +1,158 @@ +PREHOOK: query: create table t(category int, live int, comments int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: create table t(category int, live int, comments int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: insert into table t select key, 0, 2 from src tablesample(3 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t +POSTHOOK: query: insert into table t select key, 0, 2 from src tablesample(3 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.category EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t.comments SIMPLE [] +POSTHOOK: Lineage: t.live SIMPLE [] +_col0 _col1 _col2 +PREHOOK: query: explain +select category, max(live) live, max(comments) comments, rank() OVER (PARTITION BY category ORDER BY comments) rank1 +FROM t +GROUP BY category +GROUPING SETS ((), (category)) +HAVING max(comments) > 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select category, max(live) live, max(comments) comments, rank() OVER (PARTITION BY category ORDER BY comments) rank1 +FROM t +GROUP BY category +GROUPING SETS ((), (category)) +HAVING max(comments) > 0 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: category (type: int), live (type: int), comments (type: int) + outputColumnNames: category, live, comments + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(live), max(comments) + grouping sets: 0, 1 + keys: category (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), max(VALUE._col1) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Filter Operator + predicate: (_col3 > 0) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col3 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col2: int, _col3: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col3 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col3 + name: rank + window function: GenericUDAFRankEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select category, max(live) live, max(comments) comments, rank() OVER (PARTITION BY category ORDER BY comments) rank1 +FROM t +GROUP BY category +GROUPING SETS ((), (category)) +HAVING max(comments) > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select category, max(live) live, max(comments) comments, rank() OVER (PARTITION BY category ORDER BY comments) rank1 +FROM t +GROUP BY category +GROUPING SETS ((), (category)) +HAVING max(comments) > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +category live comments rank1 +NULL 0 2 1 +86 0 2 1 +238 0 2 1 +311 0 2 1 diff --git ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out index 77a0695..299c164 100644 --- ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out +++ ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out @@ -155,6 +155,7 @@ STAGE PLANS: outputColumnNames: s_store_id Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1 keys: s_store_id (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -164,10 +165,10 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int) @@ -240,6 +241,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1 keys: _col0 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -249,7 +251,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 Execution mode: vectorized, llap @@ -328,6 +330,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1 keys: _col0 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -337,7 +340,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_15.q.out ql/src/test/results/clientpositive/llap/vectorization_15.q.out index 04cd902..7fdb8e9 100644 --- ql/src/test/results/clientpositive/llap/vectorization_15.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_15.q.out @@ -215,15 +215,15 @@ POSTHOOK: Input: default@alltypesorc -51.0 true NULL QiOcvR0kt6r7f0R7fiPxQTCU -51 266531954 1969-12-31 16:00:08.451 0.0 -266531980.28 NULL NULL 33.0 0.0 0.0 NULL 51 0.0 2.66532E8 -23 266531980.28 0.0 -51.0 true NULL Ybpj38RTTYl7CnJXPNx1g4C -51 -370919370 1969-12-31 16:00:08.451 0.0 370919343.72 NULL NULL 33.0 0.0 0.0 NULL 51 0.0 -3.70919296E8 -23 -370919343.72 0.0 -6.0 NULL -200.0 NULL -6 NULL 1969-12-31 15:59:56.094 0.0 NULL -200.0 -15910.599999999999 3.0 0.0 0.0 -23.0 6 NULL NULL -5 NULL NULL --62.0 NULL 15601.0 NULL -62 NULL 1969-12-31 16:00:09.889 0.0 NULL 15601.0 1241106.353 33.0 0.0 0.0 -23.0 62 NULL NULL -23 NULL NULL +-62.0 NULL 15601.0 NULL -62 NULL 1969-12-31 15:59:56.527 0.0 NULL 15601.0 1241106.353 33.0 0.0 0.0 -23.0 62 NULL NULL -23 NULL NULL 11.0 false NULL 10pO8p1LNx4Y 11 271296824 1969-12-31 16:00:02.351 0.0 -271296850.28 NULL NULL 0.0 0.0 0.0 NULL -11 0.0 2.71296832E8 -1 271296850.28 0.0 11.0 false NULL 1H6wGP 11 -560827082 1969-12-31 16:00:02.351 0.0 560827055.72 NULL NULL 0.0 0.0 0.0 NULL -11 0.0 -5.6082707E8 -1 -560827055.72 0.0 11.0 false NULL 2a7V63IL7jK3o 11 -325931647 1969-12-31 16:00:02.351 0.0 325931620.72 NULL NULL 0.0 0.0 0.0 NULL -11 0.0 -3.25931648E8 -1 -325931620.72 0.0 11.0 true NULL 10 11 92365813 1969-12-31 16:00:02.351 0.0 -92365839.28 NULL NULL 0.0 0.0 0.0 NULL -11 0.0 9.2365808E7 -1 92365839.28 0.0 -21.0 NULL 15601.0 NULL 21 NULL 1969-12-31 16:00:14.256 0.0 NULL 15601.0 1241106.353 12.0 0.0 0.0 -23.0 -21 NULL NULL -2 NULL NULL +21.0 NULL 15601.0 NULL 21 NULL 1969-12-31 15:59:56.527 0.0 NULL 15601.0 1241106.353 12.0 0.0 0.0 -23.0 -21 NULL NULL -2 NULL NULL 32.0 NULL -200.0 NULL 32 NULL 1969-12-31 16:00:02.445 0.0 NULL -200.0 -15910.599999999999 1.0 0.0 0.0 -23.0 -32 NULL NULL -23 NULL NULL 36.0 NULL -200.0 NULL 36 NULL 1969-12-31 16:00:00.554 0.0 NULL -200.0 -15910.599999999999 33.0 0.0 0.0 -23.0 -36 NULL NULL -23 NULL NULL -5.0 NULL 15601.0 NULL 5 NULL 1969-12-31 16:00:00.959 0.0 NULL 15601.0 1241106.353 3.0 0.0 0.0 -23.0 -5 NULL NULL -3 NULL NULL +5.0 NULL 15601.0 NULL 5 NULL 1969-12-31 15:59:56.527 0.0 NULL 15601.0 1241106.353 3.0 0.0 0.0 -23.0 -5 NULL NULL -3 NULL NULL 58.0 NULL 15601.0 NULL 58 NULL 1969-12-31 15:59:56.527 0.0 NULL 15601.0 1241106.353 33.0 0.0 0.0 -23.0 -58 NULL NULL -23 NULL NULL 8.0 false NULL 10V3pN5r5lI2qWl2lG103 8 -362835731 1969-12-31 16:00:15.892 0.0 362835704.72 NULL NULL 1.0 0.0 0.0 NULL -8 0.0 -3.62835744E8 -7 -362835704.72 0.0 8.0 false NULL 10c4qt584m5y6uWT 8 -183000142 1969-12-31 16:00:15.892 0.0 183000115.72 NULL NULL 1.0 0.0 0.0 NULL -8 0.0 -1.8300016E8 -7 -183000115.72 0.0 diff --git ql/src/test/results/clientpositive/perf/query23.q.out ql/src/test/results/clientpositive/perf/query23.q.out index c8f3b85..ac53c2f 100644 --- ql/src/test/results/clientpositive/perf/query23.q.out +++ ql/src/test/results/clientpositive/perf/query23.q.out @@ -1,7 +1,7 @@ -Warning: Shuffle Join MERGEJOIN[379][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 19' is a cross product -Warning: Shuffle Join MERGEJOIN[380][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 20' is a cross product Warning: Shuffle Join MERGEJOIN[382][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 51' is a cross product Warning: Shuffle Join MERGEJOIN[383][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 52' is a cross product +Warning: Shuffle Join MERGEJOIN[379][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 19' is a cross product +Warning: Shuffle Join MERGEJOIN[380][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 20' is a cross product PREHOOK: query: explain with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt from store_sales diff --git ql/src/test/results/clientpositive/spark/groupby_cube1.q.out ql/src/test/results/clientpositive/spark/groupby_cube1.q.out index 56b8c2f..13ae75a 100644 --- ql/src/test/results/clientpositive/spark/groupby_cube1.q.out +++ ql/src/test/results/clientpositive/spark/groupby_cube1.q.out @@ -42,6 +42,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + grouping sets: 0, 1, 2, 3 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -107,6 +108,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + grouping sets: 0, 1, 2, 3 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -198,6 +200,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + grouping sets: 0, 1, 2, 3 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -288,6 +291,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT val) + grouping sets: 0, 1 keys: key (type: string), 0 (type: int), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -367,6 +371,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + grouping sets: 0, 1, 2, 3 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -473,6 +478,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT val) + grouping sets: 0, 1 keys: key (type: string), 0 (type: int), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -591,6 +597,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + grouping sets: 0, 1, 2, 3 keys: key (type: string), val (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -612,6 +619,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(1) + grouping sets: 0, 1, 2, 3 keys: key (type: string), val (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 diff --git ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out index 6d087b2..6832a83 100644 --- ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out +++ ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out @@ -42,6 +42,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + grouping sets: 0, 1, 3 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -127,6 +128,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT val) + grouping sets: 0, 1 keys: key (type: string), 0 (type: int), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -206,6 +208,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + grouping sets: 0, 1, 3 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -306,6 +309,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT val) + grouping sets: 0, 1 keys: key (type: string), 0 (type: int), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -424,6 +428,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + grouping sets: 0, 1, 3 keys: key (type: string), val (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -445,6 +450,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(1) + grouping sets: 0, 1, 3 keys: key (type: string), val (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 diff --git ql/src/test/results/clientpositive/spark/limit_pushdown2.q.out ql/src/test/results/clientpositive/spark/limit_pushdown2.q.out index f6f0043..d925f4d 100644 --- ql/src/test/results/clientpositive/spark/limit_pushdown2.q.out +++ ql/src/test/results/clientpositive/spark/limit_pushdown2.q.out @@ -966,6 +966,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(_col2) + grouping sets: 0, 1, 3 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -1051,6 +1052,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(_col2) + grouping sets: 0, 1, 3 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 diff --git ql/src/test/results/clientpositive/spark/vectorization_15.q.out ql/src/test/results/clientpositive/spark/vectorization_15.q.out index 42d888f..d8c3bac 100644 --- ql/src/test/results/clientpositive/spark/vectorization_15.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_15.q.out @@ -212,15 +212,15 @@ POSTHOOK: Input: default@alltypesorc -51.0 true NULL QiOcvR0kt6r7f0R7fiPxQTCU -51 266531954 1969-12-31 16:00:08.451 0.0 -266531980.28 NULL NULL 33.0 0.0 0.0 NULL 51 0.0 2.66532E8 -23 266531980.28 0.0 -51.0 true NULL Ybpj38RTTYl7CnJXPNx1g4C -51 -370919370 1969-12-31 16:00:08.451 0.0 370919343.72 NULL NULL 33.0 0.0 0.0 NULL 51 0.0 -3.70919296E8 -23 -370919343.72 0.0 -6.0 NULL -200.0 NULL -6 NULL 1969-12-31 15:59:56.094 0.0 NULL -200.0 -15910.599999999999 3.0 0.0 0.0 -23.0 6 NULL NULL -5 NULL NULL --62.0 NULL 15601.0 NULL -62 NULL 1969-12-31 16:00:09.889 0.0 NULL 15601.0 1241106.353 33.0 0.0 0.0 -23.0 62 NULL NULL -23 NULL NULL +-62.0 NULL 15601.0 NULL -62 NULL 1969-12-31 15:59:56.527 0.0 NULL 15601.0 1241106.353 33.0 0.0 0.0 -23.0 62 NULL NULL -23 NULL NULL 11.0 false NULL 10pO8p1LNx4Y 11 271296824 1969-12-31 16:00:02.351 0.0 -271296850.28 NULL NULL 0.0 0.0 0.0 NULL -11 0.0 2.71296832E8 -1 271296850.28 0.0 11.0 false NULL 1H6wGP 11 -560827082 1969-12-31 16:00:02.351 0.0 560827055.72 NULL NULL 0.0 0.0 0.0 NULL -11 0.0 -5.6082707E8 -1 -560827055.72 0.0 11.0 false NULL 2a7V63IL7jK3o 11 -325931647 1969-12-31 16:00:02.351 0.0 325931620.72 NULL NULL 0.0 0.0 0.0 NULL -11 0.0 -3.25931648E8 -1 -325931620.72 0.0 11.0 true NULL 10 11 92365813 1969-12-31 16:00:02.351 0.0 -92365839.28 NULL NULL 0.0 0.0 0.0 NULL -11 0.0 9.2365808E7 -1 92365839.28 0.0 -21.0 NULL 15601.0 NULL 21 NULL 1969-12-31 16:00:14.256 0.0 NULL 15601.0 1241106.353 12.0 0.0 0.0 -23.0 -21 NULL NULL -2 NULL NULL +21.0 NULL 15601.0 NULL 21 NULL 1969-12-31 15:59:56.527 0.0 NULL 15601.0 1241106.353 12.0 0.0 0.0 -23.0 -21 NULL NULL -2 NULL NULL 32.0 NULL -200.0 NULL 32 NULL 1969-12-31 16:00:02.445 0.0 NULL -200.0 -15910.599999999999 1.0 0.0 0.0 -23.0 -32 NULL NULL -23 NULL NULL 36.0 NULL -200.0 NULL 36 NULL 1969-12-31 16:00:00.554 0.0 NULL -200.0 -15910.599999999999 33.0 0.0 0.0 -23.0 -36 NULL NULL -23 NULL NULL -5.0 NULL 15601.0 NULL 5 NULL 1969-12-31 16:00:00.959 0.0 NULL 15601.0 1241106.353 3.0 0.0 0.0 -23.0 -5 NULL NULL -3 NULL NULL +5.0 NULL 15601.0 NULL 5 NULL 1969-12-31 15:59:56.527 0.0 NULL 15601.0 1241106.353 3.0 0.0 0.0 -23.0 -5 NULL NULL -3 NULL NULL 58.0 NULL 15601.0 NULL 58 NULL 1969-12-31 15:59:56.527 0.0 NULL 15601.0 1241106.353 33.0 0.0 0.0 -23.0 -58 NULL NULL -23 NULL NULL 8.0 false NULL 10V3pN5r5lI2qWl2lG103 8 -362835731 1969-12-31 16:00:15.892 0.0 362835704.72 NULL NULL 1.0 0.0 0.0 NULL -8 0.0 -3.62835744E8 -7 -362835704.72 0.0 8.0 false NULL 10c4qt584m5y6uWT 8 -183000142 1969-12-31 16:00:15.892 0.0 183000115.72 NULL NULL 1.0 0.0 0.0 NULL -8 0.0 -1.8300016E8 -7 -183000115.72 0.0 diff --git ql/src/test/results/clientpositive/vector_count.q.out ql/src/test/results/clientpositive/vector_count.q.out index e829ad8..35e0c9d 100644 --- ql/src/test/results/clientpositive/vector_count.q.out +++ ql/src/test/results/clientpositive/vector_count.q.out @@ -74,7 +74,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: bigint) - Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0), sum(VALUE._col2) @@ -138,7 +137,6 @@ STAGE PLANS: sort order: ++++ Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint) - Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, KEY._col0:14._col1, KEY._col0:14._col2, KEY._col0:14._col3) diff --git ql/src/test/results/clientpositive/vector_grouping_sets.q.out ql/src/test/results/clientpositive/vector_grouping_sets.q.out index 58d1f87..5f296aa 100644 --- ql/src/test/results/clientpositive/vector_grouping_sets.q.out +++ ql/src/test/results/clientpositive/vector_grouping_sets.q.out @@ -149,6 +149,7 @@ STAGE PLANS: outputColumnNames: s_store_id Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1 keys: s_store_id (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -158,6 +159,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int) @@ -224,6 +226,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1 keys: _col0 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -233,6 +236,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int) @@ -302,6 +306,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1 keys: _col0 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -311,6 +316,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int) diff --git ql/src/test/results/clientpositive/vectorization_15.q.out ql/src/test/results/clientpositive/vectorization_15.q.out index 5de2092..969a74b 100644 --- ql/src/test/results/clientpositive/vectorization_15.q.out +++ ql/src/test/results/clientpositive/vectorization_15.q.out @@ -215,15 +215,15 @@ POSTHOOK: Input: default@alltypesorc -51.0 true NULL QiOcvR0kt6r7f0R7fiPxQTCU -51 266531954 1969-12-31 16:00:08.451 0.0 -266531980.28 NULL NULL 33.0 0.0 0.0 NULL 51 0.0 2.66532E8 -23 266531980.28 0.0 -51.0 true NULL Ybpj38RTTYl7CnJXPNx1g4C -51 -370919370 1969-12-31 16:00:08.451 0.0 370919343.72 NULL NULL 33.0 0.0 0.0 NULL 51 0.0 -3.70919296E8 -23 -370919343.72 0.0 -6.0 NULL -200.0 NULL -6 NULL 1969-12-31 15:59:56.094 0.0 NULL -200.0 -15910.599999999999 3.0 0.0 0.0 -23.0 6 NULL NULL -5 NULL NULL --62.0 NULL 15601.0 NULL -62 NULL 1969-12-31 16:00:09.889 0.0 NULL 15601.0 1241106.353 33.0 0.0 0.0 -23.0 62 NULL NULL -23 NULL NULL +-62.0 NULL 15601.0 NULL -62 NULL 1969-12-31 15:59:56.527 0.0 NULL 15601.0 1241106.353 33.0 0.0 0.0 -23.0 62 NULL NULL -23 NULL NULL 11.0 false NULL 10pO8p1LNx4Y 11 271296824 1969-12-31 16:00:02.351 0.0 -271296850.28 NULL NULL 0.0 0.0 0.0 NULL -11 0.0 2.71296832E8 -1 271296850.28 0.0 11.0 false NULL 1H6wGP 11 -560827082 1969-12-31 16:00:02.351 0.0 560827055.72 NULL NULL 0.0 0.0 0.0 NULL -11 0.0 -5.6082707E8 -1 -560827055.72 0.0 11.0 false NULL 2a7V63IL7jK3o 11 -325931647 1969-12-31 16:00:02.351 0.0 325931620.72 NULL NULL 0.0 0.0 0.0 NULL -11 0.0 -3.25931648E8 -1 -325931620.72 0.0 11.0 true NULL 10 11 92365813 1969-12-31 16:00:02.351 0.0 -92365839.28 NULL NULL 0.0 0.0 0.0 NULL -11 0.0 9.2365808E7 -1 92365839.28 0.0 -21.0 NULL 15601.0 NULL 21 NULL 1969-12-31 16:00:14.256 0.0 NULL 15601.0 1241106.353 12.0 0.0 0.0 -23.0 -21 NULL NULL -2 NULL NULL +21.0 NULL 15601.0 NULL 21 NULL 1969-12-31 15:59:56.527 0.0 NULL 15601.0 1241106.353 12.0 0.0 0.0 -23.0 -21 NULL NULL -2 NULL NULL 32.0 NULL -200.0 NULL 32 NULL 1969-12-31 16:00:02.445 0.0 NULL -200.0 -15910.599999999999 1.0 0.0 0.0 -23.0 -32 NULL NULL -23 NULL NULL 36.0 NULL -200.0 NULL 36 NULL 1969-12-31 16:00:00.554 0.0 NULL -200.0 -15910.599999999999 33.0 0.0 0.0 -23.0 -36 NULL NULL -23 NULL NULL -5.0 NULL 15601.0 NULL 5 NULL 1969-12-31 16:00:00.959 0.0 NULL 15601.0 1241106.353 3.0 0.0 0.0 -23.0 -5 NULL NULL -3 NULL NULL +5.0 NULL 15601.0 NULL 5 NULL 1969-12-31 15:59:56.527 0.0 NULL 15601.0 1241106.353 3.0 0.0 0.0 -23.0 -5 NULL NULL -3 NULL NULL 58.0 NULL 15601.0 NULL 58 NULL 1969-12-31 15:59:56.527 0.0 NULL 15601.0 1241106.353 33.0 0.0 0.0 -23.0 -58 NULL NULL -23 NULL NULL 8.0 false NULL 10V3pN5r5lI2qWl2lG103 8 -362835731 1969-12-31 16:00:15.892 0.0 362835704.72 NULL NULL 1.0 0.0 0.0 NULL -8 0.0 -3.62835744E8 -7 -362835704.72 0.0 8.0 false NULL 10c4qt584m5y6uWT 8 -183000142 1969-12-31 16:00:15.892 0.0 183000115.72 NULL NULL 1.0 0.0 0.0 NULL -8 0.0 -1.8300016E8 -7 -183000115.72 0.0 diff --git ql/src/test/results/clientpositive/vectorization_limit.q.out ql/src/test/results/clientpositive/vectorization_limit.q.out index 39ea939..3466f03 100644 --- ql/src/test/results/clientpositive/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/vectorization_limit.q.out @@ -340,7 +340,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) diff --git ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out index f8ae962..21f933f 100644 --- ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out +++ ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out @@ -46,7 +46,6 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0) @@ -105,7 +104,6 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), avg(DISTINCT KEY._col0:2._col0), std(DISTINCT KEY._col0:3._col0)