diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java index 4632f08..2e40556 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.ql.exec; -import java.io.Serializable; import java.lang.management.ManagementFactory; import java.lang.management.MemoryMXBean; import java.lang.reflect.Field; @@ -34,15 +33,12 @@ import javolution.util.FastBitSet; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.OpParseContext; import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -72,115 +68,110 @@ /** * GroupBy operator implementation. */ -public class GroupByOperator extends Operator implements - Serializable { +public class GroupByOperator extends Operator { - private static final Log LOG = LogFactory.getLog(GroupByOperator.class - .getName()); - private static final boolean isTraceEnabled = LOG.isTraceEnabled(); private static final long serialVersionUID = 1L; private static final int NUMROWSESTIMATESIZE = 1000; - protected transient ExprNodeEvaluator[] keyFields; - protected transient ObjectInspector[] keyObjectInspectors; + private transient ExprNodeEvaluator[] keyFields; + private transient ObjectInspector[] keyObjectInspectors; - protected transient ExprNodeEvaluator[][] aggregationParameterFields; - protected transient ObjectInspector[][] aggregationParameterObjectInspectors; - protected transient ObjectInspector[][] aggregationParameterStandardObjectInspectors; - protected transient Object[][] aggregationParameterObjects; + private transient ExprNodeEvaluator[][] aggregationParameterFields; + private transient ObjectInspector[][] aggregationParameterObjectInspectors; + private transient ObjectInspector[][] aggregationParameterStandardObjectInspectors; + private transient Object[][] aggregationParameterObjects; + // so aggregationIsDistinct is a boolean array instead of a single number. - protected transient boolean[] aggregationIsDistinct; + private transient boolean[] aggregationIsDistinct; // Map from integer tag to distinct aggrs - transient protected Map> distinctKeyAggrs = + private transient Map> distinctKeyAggrs = new HashMap>(); // Map from integer tag to non-distinct aggrs with key parameters. - transient protected Map> nonDistinctKeyAggrs = + private transient Map> nonDistinctKeyAggrs = new HashMap>(); // List of non-distinct aggrs. - transient protected List nonDistinctAggrs = new ArrayList(); + private transient List nonDistinctAggrs = new ArrayList(); // Union expr for distinct keys - transient ExprNodeEvaluator unionExprEval = null; + private transient ExprNodeEvaluator unionExprEval; - transient GenericUDAFEvaluator[] aggregationEvaluators; - transient boolean[] estimableAggregationEvaluators; - - protected transient ArrayList objectInspectors; - transient ArrayList fieldNames; + private transient GenericUDAFEvaluator[] aggregationEvaluators; + private transient boolean[] estimableAggregationEvaluators; // Used by sort-based GroupBy: Mode = COMPLETE, PARTIAL1, PARTIAL2, // MERGEPARTIAL - protected transient KeyWrapper currentKeys; - protected transient KeyWrapper newKeys; - protected transient AggregationBuffer[] aggregations; - protected transient Object[][] aggregationsParametersLastInvoke; + private transient KeyWrapper currentKeys; + private transient KeyWrapper newKeys; + private transient AggregationBuffer[] aggregations; + private transient Object[][] aggregationsParametersLastInvoke; // Used by hash-based GroupBy: Mode = HASH, PARTIALS - protected transient HashMap hashAggregations; + private transient HashMap hashAggregations; // Used by hash distinct aggregations when hashGrpKeyNotRedKey is true - protected transient HashSet keysCurrentGroup; + private transient HashSet keysCurrentGroup; - transient boolean firstRow; - transient long totalMemory; - protected transient boolean hashAggr; + private transient boolean firstRow; + private transient boolean hashAggr; // The reduction is happening on the reducer, and the grouping key and // reduction keys are different. // For example: select a, count(distinct b) from T group by a // The data is sprayed by 'b' and the reducer is grouping it by 'a' - transient boolean groupKeyIsNotReduceKey; - transient boolean firstRowInGroup; - transient long numRowsInput; - transient long numRowsHashTbl; - transient int groupbyMapAggrInterval; - transient long numRowsCompareHashAggr; - transient float minReductionHashAggr; + private transient boolean groupKeyIsNotReduceKey; + private transient boolean firstRowInGroup; + private transient long numRowsInput; + private transient long numRowsHashTbl; + private transient int groupbyMapAggrInterval; + private transient long numRowsCompareHashAggr; + private transient float minReductionHashAggr; + + private transient int outputKeyLength; // current Key ObjectInspectors are standard ObjectInspectors - protected transient ObjectInspector[] currentKeyObjectInspectors; - // new Key ObjectInspectors are objectInspectors from the parent - transient StructObjectInspector newKeyObjectInspector; - transient StructObjectInspector currentKeyObjectInspector; - public static MemoryMXBean memoryMXBean; + private transient ObjectInspector[] currentKeyObjectInspectors; - /** - * Total amount of memory allowed for JVM heap. - */ - protected long maxMemory; + private transient MemoryMXBean memoryMXBean; - /** - * configure percent of memory threshold usable by QP. - */ - protected float memoryThreshold; - - private boolean groupingSetsPresent; - private int groupingSetsPosition; - private List groupingSets; - private List groupingSetsBitSet; - transient private List newKeysGroupingSets; + private transient boolean groupingSetsPresent; // generates grouping set + private transient int groupingSetsPosition; // position of grouping set, generally the last of keys + private transient List groupingSets; // declared grouping set values + private transient FastBitSet[] groupingSetsBitSet; // bitsets acquired from grouping set values + private transient Text[] newKeysGroupingSets; // for these positions, some variable primitive type (String) is used, so size // cannot be estimated. sample it at runtime. - transient List keyPositionsSize; + private transient List keyPositionsSize; // for these positions, some variable primitive type (String) is used for the // aggregation classes - transient List[] aggrPositions; + private transient List[] aggrPositions; + + private transient int fixedRowSize; + + private transient int totalVariableSize; + private transient int numEntriesVarSize; + + private transient int countAfterReport; // report or forward + private transient int heartbeatInterval; - transient int fixedRowSize; + /** + * Total amount of memory allowed for JVM heap. + */ + protected transient long maxMemory; /** * Max memory usable by the hashtable before it should flush. */ protected transient long maxHashTblMemory; - transient int totalVariableSize; - transient int numEntriesVarSize; + + /** + * configure percent of memory threshold usable by QP. + */ + protected transient float memoryThreshold; /** * Current number of entries in the hash table. */ protected transient int numEntriesHashTable; - transient int countAfterReport; // report or forward - transient int heartbeatInterval; public static FastBitSet groupingSet2BitSet(int value) { FastBitSet bits = new FastBitSet(); @@ -197,7 +188,6 @@ public static FastBitSet groupingSet2BitSet(int value) { @Override protected void initializeOp(Configuration hconf) throws HiveException { - totalMemory = Runtime.getRuntime().totalMemory(); numRowsInput = 0; numRowsHashTbl = 0; @@ -226,16 +216,15 @@ protected void initializeOp(Configuration hconf) throws HiveException { if (groupingSetsPresent) { groupingSets = conf.getListGroupingSets(); groupingSetsPosition = conf.getGroupingSetPosition(); - newKeysGroupingSets = new ArrayList(); - groupingSetsBitSet = new ArrayList(); + newKeysGroupingSets = new Text[groupingSets.size()]; + groupingSetsBitSet = new FastBitSet[groupingSets.size()]; + int pos = 0; for (Integer groupingSet: groupingSets) { // Create the mapping corresponding to the grouping set - ExprNodeEvaluator groupingSetValueEvaluator = - ExprNodeEvaluatorFactory.get(new ExprNodeConstantDesc(String.valueOf(groupingSet))); - - newKeysGroupingSets.add(groupingSetValueEvaluator.evaluate(null)); - groupingSetsBitSet.add(groupingSet2BitSet(groupingSet)); + newKeysGroupingSets[pos] = new Text(String.valueOf(groupingSet)); + groupingSetsBitSet[pos] = groupingSet2BitSet(groupingSet); + pos++; } } @@ -348,23 +337,12 @@ protected void initializeOp(Configuration hconf) throws HiveException { aggregationEvaluators[i] = agg.getGenericUDAFEvaluator(); } - // init objectInspectors - int totalFields = keyFields.length + aggregationEvaluators.length; - objectInspectors = new ArrayList(totalFields); - for (ExprNodeEvaluator keyField : keyFields) { - objectInspectors.add(null); - } MapredContext context = MapredContext.get(); if (context != null) { for (GenericUDAFEvaluator genericUDAFEvaluator : aggregationEvaluators) { context.setup(genericUDAFEvaluator); } } - for (int i = 0; i < aggregationEvaluators.length; i++) { - ObjectInspector roi = aggregationEvaluators[i].init(conf.getAggregators() - .get(i).getMode(), aggregationParameterObjectInspectors[i]); - objectInspectors.add(roi); - } aggregationsParametersLastInvoke = new Object[conf.getAggregators().size()][]; if ((conf.getMode() != GroupByDesc.Mode.HASH || conf.getBucketGroup()) && @@ -390,26 +368,25 @@ protected void initializeOp(Configuration hconf) throws HiveException { } } - fieldNames = conf.getOutputColumnNames(); + List fieldNames = new ArrayList(conf.getOutputColumnNames()); - for (int i = 0; i < keyFields.length; i++) { - objectInspectors.set(i, currentKeyObjectInspectors[i]); - } + // grouping id should be pruned, which is the last of key columns + // see ColumnPrunerGroupByProc + outputKeyLength = conf.pruneGroupingSetId() ? keyFields.length - 1 : keyFields.length; - // Generate key names - ArrayList keyNames = new ArrayList(keyFields.length); - for (int i = 0; i < keyFields.length; i++) { - keyNames.add(fieldNames.get(i)); + // init objectInspectors + ObjectInspector[] objectInspectors = + new ObjectInspector[outputKeyLength + aggregationEvaluators.length]; + for (int i = 0; i < outputKeyLength; i++) { + objectInspectors[i] = currentKeyObjectInspectors[i]; + } + for (int i = 0; i < aggregationEvaluators.length; i++) { + objectInspectors[outputKeyLength + i] = aggregationEvaluators[i].init(conf.getAggregators() + .get(i).getMode(), aggregationParameterObjectInspectors[i]); } - newKeyObjectInspector = ObjectInspectorFactory - .getStandardStructObjectInspector(keyNames, Arrays - .asList(keyObjectInspectors)); - currentKeyObjectInspector = ObjectInspectorFactory - .getStandardStructObjectInspector(keyNames, Arrays - .asList(currentKeyObjectInspectors)); outputObjInspector = ObjectInspectorFactory - .getStandardStructObjectInspector(fieldNames, objectInspectors); + .getStandardStructObjectInspector(fieldNames, Arrays.asList(objectInspectors)); KeyWrapperFactory keyWrapperFactory = new KeyWrapperFactory(keyFields, keyObjectInspectors, currentKeyObjectInspectors); @@ -769,7 +746,7 @@ public void processOp(Object row, int tag) throws HiveException { flushHashTable(true); hashAggr = false; } else { - if (isTraceEnabled) { + if (isLogTraceEnabled) { LOG.trace("Hash Aggr Enabled: #hash table = " + numRowsHashTbl + " #total = " + numRowsInput + " reduction = " + 1.0 * (numRowsHashTbl / numRowsInput) + " minReduction = " @@ -795,14 +772,14 @@ public void processOp(Object row, int tag) throws HiveException { newKeysArray[keyPos] = null; } - FastBitSet bitset = groupingSetsBitSet.get(groupingSetPos); + FastBitSet bitset = groupingSetsBitSet[groupingSetPos]; // Some keys need to be left to null corresponding to that grouping set. for (int keyPos = bitset.nextSetBit(0); keyPos >= 0; keyPos = bitset.nextSetBit(keyPos+1)) { newKeysArray[keyPos] = cloneNewKeysArray[keyPos]; } - newKeysArray[groupingSetsPosition] = newKeysGroupingSets.get(groupingSetPos); + newKeysArray[groupingSetsPosition] = newKeysGroupingSets[groupingSetPos]; processKey(row, rowInspector); } } else { @@ -972,7 +949,7 @@ private boolean shouldBeFlushed(KeyWrapper newKeys) { // Update the number of entries that can fit in the hash table numEntriesHashTable = (int) (maxHashTblMemory / (fixedRowSize + (totalVariableSize / numEntriesVarSize))); - if (isTraceEnabled) { + if (isLogTraceEnabled) { LOG.trace("Hash Aggr: #hash table = " + numEntries + " #max in hash table = " + numEntriesHashTable); } @@ -1054,19 +1031,17 @@ private void flushHashTable(boolean complete) throws HiveException { * The keys in the record * @throws HiveException */ - protected void forward(Object[] keys, - AggregationBuffer[] aggs) throws HiveException { + private void forward(Object[] keys, AggregationBuffer[] aggs) throws HiveException { - int totalFields = keys.length + aggs.length; if (forwardCache == null) { - forwardCache = new Object[totalFields]; + forwardCache = new Object[outputKeyLength + aggs.length]; } - for (int i = 0; i < keys.length; i++) { + for (int i = 0; i < outputKeyLength; i++) { forwardCache[i] = keys[i]; } for (int i = 0; i < aggs.length; i++) { - forwardCache[keys.length + i] = aggregationEvaluators[i].evaluate(aggs[i]); + forwardCache[outputKeyLength + i] = aggregationEvaluators[i].evaluate(aggs[i]); } forward(forwardCache, outputObjInspector); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index 90b4b12..918c299 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -70,6 +70,7 @@ * Key vector expressions. */ private VectorExpression[] keyExpressions; + private int outputKeyLength; private boolean isVectorOutput; @@ -768,9 +769,16 @@ protected void initializeOp(Configuration hconf) throws HiveException { List keysDesc = conf.getKeys(); try { - keyOutputWriters = new VectorExpressionWriter[keyExpressions.length]; + List outputFieldNames = conf.getOutputColumnNames(); + + // grouping id should be pruned, which is the last of key columns + // see ColumnPrunerGroupByProc + outputKeyLength = + conf.pruneGroupingSetId() ? keyExpressions.length - 1 : keyExpressions.length; + + keyOutputWriters = new VectorExpressionWriter[outputKeyLength]; - for(int i = 0; i < keyExpressions.length; ++i) { + for(int i = 0; i < outputKeyLength; ++i) { keyOutputWriters[i] = VectorExpressionWriterFactory. genVectorExpressionWritable(keysDesc.get(i)); objectInspectors.add(keyOutputWriters[i].getObjectInspector()); @@ -788,7 +796,6 @@ protected void initializeOp(Configuration hconf) throws HiveException { aggregationBatchInfo.compileAggregationBatchInfo(aggregators); } LOG.warn("VectorGroupByOperator is vector output " + isVectorOutput); - List outputFieldNames = conf.getOutputColumnNames(); outputObjInspector = ObjectInspectorFactory.getStandardStructObjectInspector( outputFieldNames, objectInspectors); if (isVectorOutput) { @@ -807,9 +814,9 @@ protected void initializeOp(Configuration hconf) throws HiveException { initializeChildren(hconf); - forwardCache = new Object[keyExpressions.length + aggregators.length]; + forwardCache = new Object[outputKeyLength + aggregators.length]; - if (keyExpressions.length == 0) { + if (outputKeyLength == 0) { processingMode = this.new ProcessingModeGlobalAggregate(); } else if (conf.getVectorDesc().isVectorGroupBatches()) { // Sorted GroupBy of vector batches where an individual batch has the same group key (e.g. reduce). @@ -872,7 +879,7 @@ private void writeSingleRow(VectorHashKeyWrapper kw, VectorAggregationBufferRow int fi = 0; if (!isVectorOutput) { // Output row. - for (int i = 0; i < keyExpressions.length; ++i) { + for (int i = 0; i < outputKeyLength; ++i) { forwardCache[fi++] = keyWrappersBatch.getWritableKeyValue ( kw, i, keyOutputWriters[i]); } @@ -886,7 +893,7 @@ private void writeSingleRow(VectorHashKeyWrapper kw, VectorAggregationBufferRow forward(forwardCache, outputObjInspector); } else { // Output keys and aggregates into the output batch. - for (int i = 0; i < keyExpressions.length; ++i) { + for (int i = 0; i < outputKeyLength; ++i) { vectorColumnAssign[fi++].assignObjectValue(keyWrappersBatch.getWritableKeyValue ( kw, i, keyOutputWriters[i]), outputBatch.size); } @@ -910,7 +917,7 @@ private void writeSingleRow(VectorHashKeyWrapper kw, VectorAggregationBufferRow */ private void writeGroupRow(VectorAggregationBufferRow agg, DataOutputBuffer buffer) throws HiveException { - int fi = keyExpressions.length; // Start after group keys. + int fi = outputKeyLength; // Start after group keys. for (int i = 0; i < aggregators.length; ++i) { vectorColumnAssign[fi++].assignObjectValue(aggregators[i].evaluateOutput( agg.getAggregationBuffer(i)), outputBatch.size); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java index afd1738..abf32f1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java @@ -141,6 +141,17 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, colLists = Utilities.mergeUniqElems(colLists, param.getCols()); } } + int groupingSetPosition = conf.getGroupingSetPosition(); + if (groupingSetPosition >= 0) { + List cols = cppCtx.genColLists(op); + String groupingColumn = conf.getOutputColumnNames().get(groupingSetPosition); + if (!cols.contains(groupingColumn)) { + conf.getOutputColumnNames().remove(groupingSetPosition); + if (op.getSchema() != null) { + op.getSchema().getSignature().remove(groupingSetPosition); + } + } + } cppCtx.getPrunedColLists().put(op, colLists); return null; diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingInferenceOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingInferenceOptimizer.java index 87fba2d..7954767 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingInferenceOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingInferenceOptimizer.java @@ -103,7 +103,7 @@ private void inferBucketingSorting(List mapRedTasks) throws Semantic Map opRules = new LinkedHashMap(); opRules.put(new RuleRegExp("R1", SelectOperator.getOperatorName() + "%"), BucketingSortingOpProcFactory.getSelProc()); - // Matches only GroupByOpeartors which are reducers, rather than map group by operators, + // Matches only GroupByOperators which are reducers, rather than map group by operators, // or multi group by optimization specific operators opRules.put(new RuleExactMatch("R2", GroupByOperator.getOperatorName() + "%"), BucketingSortingOpProcFactory.getGroupByProc()); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingOpProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingOpProcFactory.java index 82f4243..cf02bec 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingOpProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingOpProcFactory.java @@ -45,6 +45,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; +import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; @@ -669,7 +670,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, processGroupByReduceSink((ReduceSinkOperator) rop, gop, bctx); - return processGroupBy((ReduceSinkOperator)rop , gop, bctx); + return processGroupBy(rop , gop, bctx); } /** @@ -683,12 +684,16 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, protected void processGroupByReduceSink(ReduceSinkOperator rop, GroupByOperator gop, BucketingSortingCtx bctx){ + GroupByDesc groupByDesc = gop.getConf(); String sortOrder = rop.getConf().getOrder(); List bucketCols = new ArrayList(); List sortCols = new ArrayList(); assert rop.getConf().getKeyCols().size() <= rop.getSchema().getSignature().size(); // Group by operators select the key cols, so no need to find them in the values for (int i = 0; i < rop.getConf().getKeyCols().size(); i++) { + if (groupByDesc.pruneGroupingSetId() && groupByDesc.getGroupingSetPosition() == i) { + continue; + } String colName = rop.getSchema().getSignature().get(i).getInternalName(); bucketCols.add(new BucketCol(colName, i)); sortCols.add(new SortCol(colName, i, sortOrder.charAt(i))); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkMapJoinResolver.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkMapJoinResolver.java index 05748a1..69004dc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkMapJoinResolver.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkMapJoinResolver.java @@ -74,12 +74,12 @@ public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { // Check whether the specified BaseWork's operator tree contains a operator // of the specified operator class private boolean containsOp(BaseWork work, Class clazz) { - Set> matchingOps = getOp(work, clazz); + Set> matchingOps = getOp(work, clazz); return matchingOps != null && !matchingOps.isEmpty(); } - public static Set> getOp(BaseWork work, Class clazz) { - Set> ops = new HashSet>(); + public static Set> getOp(BaseWork work, Class clazz) { + Set> ops = new HashSet>(); if (work instanceof MapWork) { Collection> opSet = ((MapWork) work).getAliasToWork().values(); Stack> opStack = new Stack>(); @@ -184,7 +184,7 @@ private void generateLocalWork(SparkTask originalTask) { Context ctx = physicalContext.getContext(); for (BaseWork work : allBaseWorks) { - Set> ops = getOp(work, MapJoinOperator.class); + Set> ops = getOp(work, MapJoinOperator.class); if (ops == null || ops.isEmpty()) { continue; } @@ -213,7 +213,7 @@ private void generateLocalWork(SparkTask originalTask) { } for (BaseWork parentWork : originalWork.getParents(work)) { - Set> hashTableSinkOps = + Set> hashTableSinkOps = getOp(parentWork, SparkHashTableSinkOperator.class); if (hashTableSinkOps == null || hashTableSinkOps.isEmpty()) { continue; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index b93a293..c8fc4e8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -2640,7 +2640,7 @@ private Operator genNotNullFilterForJoinSourcePlan(QB qb, Operator input, @SuppressWarnings("nls") // TODO: make aliases unique, otherwise needless rewriting takes place - Integer genColListRegex(String colRegex, String tabAlias, ASTNode sel, + Integer genColListRegex(String colRegex, String tabAlias, ASTNode sel, ArrayList col_list, HashSet excludeCols, RowResolver input, RowResolver colSrcRR, Integer pos, RowResolver output, List aliases, boolean ensureUniqueCols) throws SemanticException { @@ -3936,7 +3936,7 @@ private Operator genGroupByPlanGroupByOperator(QBParseInfo parseInfo, Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild( new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, - false, groupByMemoryUsage, memoryThreshold, null, false, 0, numDistinctUDFs > 0), + false, groupByMemoryUsage, memoryThreshold, null, false, -1, numDistinctUDFs > 0), new RowSchema(groupByOutputRowResolver.getColumnInfos()), input), groupByOutputRowResolver); op.setColumnExprMap(colExprMap); @@ -4061,10 +4061,11 @@ private Operator genGroupByPlanGroupByOperator1(QBParseInfo parseInfo, } // This is only needed if a new grouping set key is being created - int groupingSetsPosition = 0; + int groupingSetsPosition = -1; // For grouping sets, add a dummy grouping key if (groupingSetsPresent) { + groupingSetsPosition = groupByKeys.size(); // Consider the query: select a,b, count(1) from T group by a,b with cube; // where it is being executed in a single map-reduce job // The plan is TableScan -> GroupBy1 -> ReduceSink -> GroupBy2 -> FileSink @@ -4079,7 +4080,6 @@ private Operator genGroupByPlanGroupByOperator1(QBParseInfo parseInfo, colExprMap); } else { - groupingSetsPosition = groupByKeys.size(); // The grouping set has not yet been processed. Create a new grouping key // Consider the query: select a,b, count(1) from T group by a,b with cube; // where it is being executed in 2 map-reduce jobs @@ -4295,7 +4295,7 @@ private Operator genGroupByPlanMapGroupByOperator(QB qb, } // The grouping set key is present after the grouping keys, before the distinct keys - int groupingSetsPosition = groupByKeys.size(); + int groupingSetsPosition = -1; // For grouping sets, add a dummy grouping key // This dummy key needs to be added as a reduce key @@ -4307,6 +4307,7 @@ private Operator genGroupByPlanMapGroupByOperator(QB qb, // This function is called for GroupBy1 to create an additional grouping key // for the grouping set (corresponding to the rollup). if (groupingSetsPresent) { + groupingSetsPosition = groupByKeys.size(); createNewGroupingKey(groupByKeys, outputColumnNames, groupByOutputRowResolver, @@ -4863,8 +4864,10 @@ private Operator genGroupByPlanGroupByOperator2MR(QBParseInfo parseInfo, colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); } + int groupingSetsPosition = -1; // For grouping sets, add a dummy grouping key if (groupingSetsPresent) { + groupingSetsPosition = groupByKeys.size(); addGroupingSetKey( groupByKeys, groupByInputRowResolver2, @@ -4920,7 +4923,8 @@ private Operator genGroupByPlanGroupByOperator2MR(QBParseInfo parseInfo, Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild( new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, - false, groupByMemoryUsage, memoryThreshold, null, false, 0, containsDistinctAggr), + false, groupByMemoryUsage, memoryThreshold, null, false, + groupingSetsPosition, containsDistinctAggr), new RowSchema(groupByOutputRowResolver2.getColumnInfos()), reduceSinkOperatorInfo2), groupByOutputRowResolver2); op.setColumnExprMap(colExprMap); @@ -7439,7 +7443,7 @@ private Operator genMapGroupByForSemijoin(QB qb, ArrayList fields, // t .getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD); Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild( new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, - false, groupByMemoryUsage, memoryThreshold, null, false, 0, false), + false, groupByMemoryUsage, memoryThreshold, null, false, -1, false), new RowSchema(groupByOutputRowResolver.getColumnInfos()), inputOperatorInfo), groupByOutputRowResolver); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java index 7a0b0da..f031b28 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java @@ -61,7 +61,7 @@ private ArrayList keys; private List listGroupingSets; private boolean groupingSetsPresent; - private int groupingSetPosition; + private int groupingSetPosition = -1; private ArrayList aggregators; private ArrayList outputColumnNames; private float groupByMemoryUsage; @@ -177,6 +177,12 @@ public void setKeys(final ArrayList keys) { return outputColumnNames; } + @Explain(displayName = "pruneGroupingSetId", displayOnlyOnTrue = true) + public boolean pruneGroupingSetId() { + return groupingSetPosition >= 0 && + outputColumnNames.size() != keys.size() + aggregators.size(); + } + public void setOutputColumnNames( ArrayList outputColumnNames) { this.outputColumnNames = outputColumnNames; diff --git ql/src/test/queries/clientpositive/groupby_grouping_window.q ql/src/test/queries/clientpositive/groupby_grouping_window.q new file mode 100644 index 0000000..b456074 --- /dev/null +++ ql/src/test/queries/clientpositive/groupby_grouping_window.q @@ -0,0 +1,15 @@ +create table t(category int, live int, comments int); +insert into table t select key, 0, 2 from src tablesample(3 rows); + +explain +select category, max(live) live, max(comments) comments, rank() OVER (PARTITION BY category ORDER BY comments) rank1 +FROM t +GROUP BY category +GROUPING SETS ((), (category)) +HAVING max(comments) > 0; + +select category, max(live) live, max(comments) comments, rank() OVER (PARTITION BY category ORDER BY comments) rank1 +FROM t +GROUP BY category +GROUPING SETS ((), (category)) +HAVING max(comments) > 0; diff --git ql/src/test/results/clientpositive/annotate_stats_groupby.q.out ql/src/test/results/clientpositive/annotate_stats_groupby.q.out index 89dd1de..cb633be 100644 --- ql/src/test/results/clientpositive/annotate_stats_groupby.q.out +++ ql/src/test/results/clientpositive/annotate_stats_groupby.q.out @@ -389,8 +389,9 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 32 Data size: 2880 Basic stats: COMPLETE Column stats: COMPLETE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 @@ -446,8 +447,9 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 @@ -503,8 +505,9 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 @@ -560,8 +563,9 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 @@ -617,8 +621,9 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 @@ -674,8 +679,9 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 32 Data size: 2880 Basic stats: COMPLETE Column stats: COMPLETE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 @@ -792,8 +798,9 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 @@ -908,8 +915,9 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 @@ -965,8 +973,9 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 @@ -1022,8 +1031,9 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 @@ -1079,8 +1089,9 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 @@ -1136,8 +1147,9 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 @@ -1193,8 +1205,9 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 @@ -1307,8 +1320,9 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out index c3bf0d8..046d204 100644 --- ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out +++ ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out @@ -164,8 +164,9 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 40 Data size: 400 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 @@ -341,8 +342,9 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 2064 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 1384 Basic stats: COMPLETE Column stats: COMPLETE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 @@ -455,8 +457,9 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 2064 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 1384 Basic stats: COMPLETE Column stats: COMPLETE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/groupby_cube1.q.out ql/src/test/results/clientpositive/groupby_cube1.q.out index 4f44d4f..0dc0159 100644 --- ql/src/test/results/clientpositive/groupby_cube1.q.out +++ ql/src/test/results/clientpositive/groupby_cube1.q.out @@ -56,8 +56,9 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 @@ -223,8 +224,9 @@ STAGE PLANS: aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1 @@ -320,8 +322,9 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 @@ -403,8 +406,9 @@ STAGE PLANS: aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1 @@ -541,8 +545,9 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col3) (type: int) outputColumnNames: _col0, _col1, _col2 @@ -608,8 +613,9 @@ STAGE PLANS: aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col3) (type: int) outputColumnNames: _col0, _col1, _col2 diff --git ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out index 0a21dbe..5acac64 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out @@ -79,8 +79,9 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 @@ -185,8 +186,9 @@ STAGE PLANS: aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double) outputColumnNames: _col0, _col1, _col2 @@ -313,8 +315,9 @@ STAGE PLANS: aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 12 Data size: 84 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 diff --git ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out index 3597609..b96521a 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out @@ -74,8 +74,9 @@ STAGE PLANS: aggregations: avg(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 @@ -185,8 +186,9 @@ STAGE PLANS: aggregations: avg(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 diff --git ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out index d1be46d..09cddfb 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out @@ -71,8 +71,9 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 @@ -148,8 +149,9 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 @@ -273,8 +275,9 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 @@ -374,8 +377,9 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 diff --git ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out index 6d11add..b2625fe 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out @@ -88,8 +88,9 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 @@ -228,8 +229,9 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 diff --git ql/src/test/results/clientpositive/groupby_grouping_sets6.q.out ql/src/test/results/clientpositive/groupby_grouping_sets6.q.out index d2ff112..d8c732a 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets6.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets6.q.out @@ -55,8 +55,9 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 @@ -140,8 +141,9 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/groupby_grouping_window.q.out ql/src/test/results/clientpositive/groupby_grouping_window.q.out new file mode 100644 index 0000000..b82d2e5 --- /dev/null +++ ql/src/test/results/clientpositive/groupby_grouping_window.q.out @@ -0,0 +1,132 @@ +PREHOOK: query: create table t(category int, live int, comments int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: create table t(category int, live int, comments int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: insert into table t select key, 0, 2 from src tablesample(3 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t +POSTHOOK: query: insert into table t select key, 0, 2 from src tablesample(3 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.category EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t.comments SIMPLE [] +POSTHOOK: Lineage: t.live SIMPLE [] +PREHOOK: query: explain +select category, max(live) live, max(comments) comments, rank() OVER (PARTITION BY category ORDER BY comments) rank1 +FROM t +GROUP BY category +GROUPING SETS ((), (category)) +HAVING max(comments) > 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select category, max(live) live, max(comments) comments, rank() OVER (PARTITION BY category ORDER BY comments) rank1 +FROM t +GROUP BY category +GROUPING SETS ((), (category)) +HAVING max(comments) > 0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: category (type: int), live (type: int), comments (type: int) + outputColumnNames: category, live, comments + Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(live), max(comments) + keys: category (type: int), '0' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), max(VALUE._col1) + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Filter Operator + predicate: (_col3 > 0) (type: boolean) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col3 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _wcol0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select category, max(live) live, max(comments) comments, rank() OVER (PARTITION BY category ORDER BY comments) rank1 +FROM t +GROUP BY category +GROUPING SETS ((), (category)) +HAVING max(comments) > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select category, max(live) live, max(comments) comments, rank() OVER (PARTITION BY category ORDER BY comments) rank1 +FROM t +GROUP BY category +GROUPING SETS ((), (category)) +HAVING max(comments) > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +NULL 0 2 1 +86 0 2 1 +238 0 2 1 +311 0 2 1 diff --git ql/src/test/results/clientpositive/groupby_rollup1.q.out ql/src/test/results/clientpositive/groupby_rollup1.q.out index 0108ce0..cb39bc1 100644 --- ql/src/test/results/clientpositive/groupby_rollup1.q.out +++ ql/src/test/results/clientpositive/groupby_rollup1.q.out @@ -56,8 +56,9 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 @@ -133,8 +134,9 @@ STAGE PLANS: aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1 @@ -230,8 +232,9 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 @@ -307,8 +310,9 @@ STAGE PLANS: aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1 @@ -445,8 +449,9 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col3) (type: int) outputColumnNames: _col0, _col1, _col2 @@ -512,8 +517,9 @@ STAGE PLANS: aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col3) (type: int) outputColumnNames: _col0, _col1, _col2 diff --git ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out index 301b90c..413e7b3 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out @@ -63,8 +63,9 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 @@ -143,9 +144,9 @@ SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] +Num Buckets: 1 +Bucket Columns: [key, value] +Sort Columns: [Order(col:key, order:1), Order(col:value, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test rollup, should be bucketed and sorted on key, value, grouping_key @@ -250,8 +251,9 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 @@ -330,9 +332,9 @@ SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] +Num Buckets: 1 +Bucket Columns: [key, value] +Sort Columns: [Order(col:key, order:1), Order(col:value, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test cube, should be bucketed and sorted on key, value, grouping_key @@ -437,8 +439,9 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 @@ -517,9 +520,9 @@ SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] +Num Buckets: 1 +Bucket Columns: [key, value] +Sort Columns: [Order(col:key, order:1), Order(col:value, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test grouping sets, should be bucketed and sorted on key, value, grouping_key diff --git ql/src/test/results/clientpositive/spark/groupby_cube1.q.out ql/src/test/results/clientpositive/spark/groupby_cube1.q.out index ea2521d..0d37ece 100644 --- ql/src/test/results/clientpositive/spark/groupby_cube1.q.out +++ ql/src/test/results/clientpositive/spark/groupby_cube1.q.out @@ -62,8 +62,9 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 @@ -241,8 +242,9 @@ STAGE PLANS: aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1 @@ -334,8 +336,9 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 @@ -423,8 +426,9 @@ STAGE PLANS: aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1 @@ -562,8 +566,9 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col3) (type: int) outputColumnNames: _col0, _col1, _col2 @@ -596,8 +601,9 @@ STAGE PLANS: aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col3) (type: int) outputColumnNames: _col0, _col1, _col2 diff --git ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out index 701a62a..291b081 100644 --- ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out +++ ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out @@ -62,8 +62,9 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 @@ -145,8 +146,9 @@ STAGE PLANS: aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1 @@ -238,8 +240,9 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 @@ -321,8 +324,9 @@ STAGE PLANS: aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1 @@ -460,8 +464,9 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col3) (type: int) outputColumnNames: _col0, _col1, _col2 @@ -494,8 +499,9 @@ STAGE PLANS: aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col3) (type: int) outputColumnNames: _col0, _col1, _col2