diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java index 4632f08..09a707e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java @@ -42,7 +42,6 @@ import org.apache.hadoop.hive.ql.parse.OpParseContext; import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -104,9 +103,6 @@ transient GenericUDAFEvaluator[] aggregationEvaluators; transient boolean[] estimableAggregationEvaluators; - protected transient ArrayList objectInspectors; - transient ArrayList fieldNames; - // Used by sort-based GroupBy: Mode = COMPLETE, PARTIAL1, PARTIAL2, // MERGEPARTIAL protected transient KeyWrapper currentKeys; @@ -135,11 +131,10 @@ transient long numRowsCompareHashAggr; transient float minReductionHashAggr; + transient int outputKeyLength; + // current Key ObjectInspectors are standard ObjectInspectors protected transient ObjectInspector[] currentKeyObjectInspectors; - // new Key ObjectInspectors are objectInspectors from the parent - transient StructObjectInspector newKeyObjectInspector; - transient StructObjectInspector currentKeyObjectInspector; public static MemoryMXBean memoryMXBean; /** @@ -155,8 +150,8 @@ private boolean groupingSetsPresent; private int groupingSetsPosition; private List groupingSets; - private List groupingSetsBitSet; - transient private List newKeysGroupingSets; + private FastBitSet[] groupingSetsBitSet; + transient private Text[] newKeysGroupingSets; // for these positions, some variable primitive type (String) is used, so size // cannot be estimated. sample it at runtime. @@ -226,16 +221,15 @@ protected void initializeOp(Configuration hconf) throws HiveException { if (groupingSetsPresent) { groupingSets = conf.getListGroupingSets(); groupingSetsPosition = conf.getGroupingSetPosition(); - newKeysGroupingSets = new ArrayList(); - groupingSetsBitSet = new ArrayList(); + newKeysGroupingSets = new Text[groupingSets.size()]; + groupingSetsBitSet = new FastBitSet[groupingSets.size()]; + int pos = 0; for (Integer groupingSet: groupingSets) { // Create the mapping corresponding to the grouping set - ExprNodeEvaluator groupingSetValueEvaluator = - ExprNodeEvaluatorFactory.get(new ExprNodeConstantDesc(String.valueOf(groupingSet))); - - newKeysGroupingSets.add(groupingSetValueEvaluator.evaluate(null)); - groupingSetsBitSet.add(groupingSet2BitSet(groupingSet)); + newKeysGroupingSets[pos] = new Text(String.valueOf(groupingSet)); + groupingSetsBitSet[pos] = groupingSet2BitSet(groupingSet); + pos++; } } @@ -348,23 +342,12 @@ protected void initializeOp(Configuration hconf) throws HiveException { aggregationEvaluators[i] = agg.getGenericUDAFEvaluator(); } - // init objectInspectors - int totalFields = keyFields.length + aggregationEvaluators.length; - objectInspectors = new ArrayList(totalFields); - for (ExprNodeEvaluator keyField : keyFields) { - objectInspectors.add(null); - } MapredContext context = MapredContext.get(); if (context != null) { for (GenericUDAFEvaluator genericUDAFEvaluator : aggregationEvaluators) { context.setup(genericUDAFEvaluator); } } - for (int i = 0; i < aggregationEvaluators.length; i++) { - ObjectInspector roi = aggregationEvaluators[i].init(conf.getAggregators() - .get(i).getMode(), aggregationParameterObjectInspectors[i]); - objectInspectors.add(roi); - } aggregationsParametersLastInvoke = new Object[conf.getAggregators().size()][]; if ((conf.getMode() != GroupByDesc.Mode.HASH || conf.getBucketGroup()) && @@ -390,26 +373,26 @@ protected void initializeOp(Configuration hconf) throws HiveException { } } - fieldNames = conf.getOutputColumnNames(); + List fieldNames = new ArrayList(conf.getOutputColumnNames()); - for (int i = 0; i < keyFields.length; i++) { - objectInspectors.set(i, currentKeyObjectInspectors[i]); + // init objectInspectors + outputKeyLength = keyFields.length; + if (fieldNames.size() != outputKeyLength + aggregationEvaluators.length) { + outputKeyLength--; } - - // Generate key names - ArrayList keyNames = new ArrayList(keyFields.length); - for (int i = 0; i < keyFields.length; i++) { - keyNames.add(fieldNames.get(i)); + + ObjectInspector[] objectInspectors = + new ObjectInspector[outputKeyLength + aggregationEvaluators.length]; + for (int i = 0; i < outputKeyLength; i++) { + objectInspectors[i] = currentKeyObjectInspectors[i]; + } + for (int i = 0; i < aggregationEvaluators.length; i++) { + objectInspectors[outputKeyLength + i] = aggregationEvaluators[i].init(conf.getAggregators() + .get(i).getMode(), aggregationParameterObjectInspectors[i]); } - newKeyObjectInspector = ObjectInspectorFactory - .getStandardStructObjectInspector(keyNames, Arrays - .asList(keyObjectInspectors)); - currentKeyObjectInspector = ObjectInspectorFactory - .getStandardStructObjectInspector(keyNames, Arrays - .asList(currentKeyObjectInspectors)); outputObjInspector = ObjectInspectorFactory - .getStandardStructObjectInspector(fieldNames, objectInspectors); + .getStandardStructObjectInspector(fieldNames, Arrays.asList(objectInspectors)); KeyWrapperFactory keyWrapperFactory = new KeyWrapperFactory(keyFields, keyObjectInspectors, currentKeyObjectInspectors); @@ -795,14 +778,14 @@ public void processOp(Object row, int tag) throws HiveException { newKeysArray[keyPos] = null; } - FastBitSet bitset = groupingSetsBitSet.get(groupingSetPos); + FastBitSet bitset = groupingSetsBitSet[groupingSetPos]; // Some keys need to be left to null corresponding to that grouping set. for (int keyPos = bitset.nextSetBit(0); keyPos >= 0; keyPos = bitset.nextSetBit(keyPos+1)) { newKeysArray[keyPos] = cloneNewKeysArray[keyPos]; } - newKeysArray[groupingSetsPosition] = newKeysGroupingSets.get(groupingSetPos); + newKeysArray[groupingSetsPosition] = newKeysGroupingSets[groupingSetPos]; processKey(row, rowInspector); } } else { @@ -1054,19 +1037,17 @@ private void flushHashTable(boolean complete) throws HiveException { * The keys in the record * @throws HiveException */ - protected void forward(Object[] keys, - AggregationBuffer[] aggs) throws HiveException { + private void forward(Object[] keys, AggregationBuffer[] aggs) throws HiveException { - int totalFields = keys.length + aggs.length; if (forwardCache == null) { - forwardCache = new Object[totalFields]; + forwardCache = new Object[outputKeyLength + aggs.length]; } - for (int i = 0; i < keys.length; i++) { + for (int i = 0; i < outputKeyLength; i++) { forwardCache[i] = keys[i]; } for (int i = 0; i < aggs.length; i++) { - forwardCache[keys.length + i] = aggregationEvaluators[i].evaluate(aggs[i]); + forwardCache[outputKeyLength + i] = aggregationEvaluators[i].evaluate(aggs[i]); } forward(forwardCache, outputObjInspector); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index 90b4b12..73c1342 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -70,6 +70,7 @@ * Key vector expressions. */ private VectorExpression[] keyExpressions; + private int outputKeyLength; private boolean isVectorOutput; @@ -768,9 +769,15 @@ protected void initializeOp(Configuration hconf) throws HiveException { List keysDesc = conf.getKeys(); try { - keyOutputWriters = new VectorExpressionWriter[keyExpressions.length]; + List outputFieldNames = conf.getOutputColumnNames(); + + outputKeyLength = keyExpressions.length; + if (outputFieldNames.size() != outputKeyLength + aggregators.length) { + outputKeyLength--; + } + keyOutputWriters = new VectorExpressionWriter[outputKeyLength]; - for(int i = 0; i < keyExpressions.length; ++i) { + for(int i = 0; i < outputKeyLength; ++i) { keyOutputWriters[i] = VectorExpressionWriterFactory. genVectorExpressionWritable(keysDesc.get(i)); objectInspectors.add(keyOutputWriters[i].getObjectInspector()); @@ -788,7 +795,6 @@ protected void initializeOp(Configuration hconf) throws HiveException { aggregationBatchInfo.compileAggregationBatchInfo(aggregators); } LOG.warn("VectorGroupByOperator is vector output " + isVectorOutput); - List outputFieldNames = conf.getOutputColumnNames(); outputObjInspector = ObjectInspectorFactory.getStandardStructObjectInspector( outputFieldNames, objectInspectors); if (isVectorOutput) { @@ -807,9 +813,9 @@ protected void initializeOp(Configuration hconf) throws HiveException { initializeChildren(hconf); - forwardCache = new Object[keyExpressions.length + aggregators.length]; + forwardCache = new Object[outputKeyLength + aggregators.length]; - if (keyExpressions.length == 0) { + if (outputKeyLength == 0) { processingMode = this.new ProcessingModeGlobalAggregate(); } else if (conf.getVectorDesc().isVectorGroupBatches()) { // Sorted GroupBy of vector batches where an individual batch has the same group key (e.g. reduce). @@ -872,7 +878,7 @@ private void writeSingleRow(VectorHashKeyWrapper kw, VectorAggregationBufferRow int fi = 0; if (!isVectorOutput) { // Output row. - for (int i = 0; i < keyExpressions.length; ++i) { + for (int i = 0; i < outputKeyLength; ++i) { forwardCache[fi++] = keyWrappersBatch.getWritableKeyValue ( kw, i, keyOutputWriters[i]); } @@ -886,7 +892,7 @@ private void writeSingleRow(VectorHashKeyWrapper kw, VectorAggregationBufferRow forward(forwardCache, outputObjInspector); } else { // Output keys and aggregates into the output batch. - for (int i = 0; i < keyExpressions.length; ++i) { + for (int i = 0; i < outputKeyLength; ++i) { vectorColumnAssign[fi++].assignObjectValue(keyWrappersBatch.getWritableKeyValue ( kw, i, keyOutputWriters[i]), outputBatch.size); } @@ -910,7 +916,7 @@ private void writeSingleRow(VectorHashKeyWrapper kw, VectorAggregationBufferRow */ private void writeGroupRow(VectorAggregationBufferRow agg, DataOutputBuffer buffer) throws HiveException { - int fi = keyExpressions.length; // Start after group keys. + int fi = outputKeyLength; // Start after group keys. for (int i = 0; i < aggregators.length; ++i) { vectorColumnAssign[fi++].assignObjectValue(aggregators[i].evaluateOutput( agg.getAggregationBuffer(i)), outputBatch.size); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java index afd1738..abf32f1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java @@ -141,6 +141,17 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, colLists = Utilities.mergeUniqElems(colLists, param.getCols()); } } + int groupingSetPosition = conf.getGroupingSetPosition(); + if (groupingSetPosition >= 0) { + List cols = cppCtx.genColLists(op); + String groupingColumn = conf.getOutputColumnNames().get(groupingSetPosition); + if (!cols.contains(groupingColumn)) { + conf.getOutputColumnNames().remove(groupingSetPosition); + if (op.getSchema() != null) { + op.getSchema().getSignature().remove(groupingSetPosition); + } + } + } cppCtx.getPrunedColLists().put(op, colLists); return null; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index cea86df..df7a0a2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -2642,7 +2642,7 @@ private Operator genNotNullFilterForJoinSourcePlan(QB qb, Operator input, @SuppressWarnings("nls") // TODO: make aliases unique, otherwise needless rewriting takes place - Integer genColListRegex(String colRegex, String tabAlias, ASTNode sel, + Integer genColListRegex(String colRegex, String tabAlias, ASTNode sel, ArrayList col_list, HashSet excludeCols, RowResolver input, RowResolver colSrcRR, Integer pos, RowResolver output, List aliases, boolean ensureUniqueCols) throws SemanticException { @@ -4063,10 +4063,11 @@ private Operator genGroupByPlanGroupByOperator1(QBParseInfo parseInfo, } // This is only needed if a new grouping set key is being created - int groupingSetsPosition = 0; + int groupingSetsPosition = -1; // For grouping sets, add a dummy grouping key if (groupingSetsPresent) { + groupingSetsPosition = groupByKeys.size(); // Consider the query: select a,b, count(1) from T group by a,b with cube; // where it is being executed in a single map-reduce job // The plan is TableScan -> GroupBy1 -> ReduceSink -> GroupBy2 -> FileSink @@ -4081,7 +4082,6 @@ private Operator genGroupByPlanGroupByOperator1(QBParseInfo parseInfo, colExprMap); } else { - groupingSetsPosition = groupByKeys.size(); // The grouping set has not yet been processed. Create a new grouping key // Consider the query: select a,b, count(1) from T group by a,b with cube; // where it is being executed in 2 map-reduce jobs @@ -4297,7 +4297,7 @@ private Operator genGroupByPlanMapGroupByOperator(QB qb, } // The grouping set key is present after the grouping keys, before the distinct keys - int groupingSetsPosition = groupByKeys.size(); + int groupingSetsPosition = -1; // For grouping sets, add a dummy grouping key // This dummy key needs to be added as a reduce key @@ -4309,6 +4309,7 @@ private Operator genGroupByPlanMapGroupByOperator(QB qb, // This function is called for GroupBy1 to create an additional grouping key // for the grouping set (corresponding to the rollup). if (groupingSetsPresent) { + groupingSetsPosition = groupByKeys.size(); createNewGroupingKey(groupByKeys, outputColumnNames, groupByOutputRowResolver, @@ -4865,8 +4866,10 @@ private Operator genGroupByPlanGroupByOperator2MR(QBParseInfo parseInfo, colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); } + int groupingSetsPosition = -1; // For grouping sets, add a dummy grouping key if (groupingSetsPresent) { + groupingSetsPosition = groupByKeys.size(); addGroupingSetKey( groupByKeys, groupByInputRowResolver2, @@ -4922,7 +4925,8 @@ private Operator genGroupByPlanGroupByOperator2MR(QBParseInfo parseInfo, Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild( new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, - false, groupByMemoryUsage, memoryThreshold, null, false, 0, containsDistinctAggr), + false, groupByMemoryUsage, memoryThreshold, null, false, + groupingSetsPosition, containsDistinctAggr), new RowSchema(groupByOutputRowResolver2.getColumnInfos()), reduceSinkOperatorInfo2), groupByOutputRowResolver2); op.setColumnExprMap(colExprMap); diff --git ql/src/test/queries/clientpositive/groupby_grouping_window.q ql/src/test/queries/clientpositive/groupby_grouping_window.q new file mode 100644 index 0000000..b456074 --- /dev/null +++ ql/src/test/queries/clientpositive/groupby_grouping_window.q @@ -0,0 +1,15 @@ +create table t(category int, live int, comments int); +insert into table t select key, 0, 2 from src tablesample(3 rows); + +explain +select category, max(live) live, max(comments) comments, rank() OVER (PARTITION BY category ORDER BY comments) rank1 +FROM t +GROUP BY category +GROUPING SETS ((), (category)) +HAVING max(comments) > 0; + +select category, max(live) live, max(comments) comments, rank() OVER (PARTITION BY category ORDER BY comments) rank1 +FROM t +GROUP BY category +GROUPING SETS ((), (category)) +HAVING max(comments) > 0; diff --git ql/src/test/results/clientpositive/annotate_stats_groupby.q.out ql/src/test/results/clientpositive/annotate_stats_groupby.q.out index 89dd1de..875a2da 100644 --- ql/src/test/results/clientpositive/annotate_stats_groupby.q.out +++ ql/src/test/results/clientpositive/annotate_stats_groupby.q.out @@ -389,8 +389,8 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 32 Data size: 2880 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 @@ -446,8 +446,8 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 @@ -503,8 +503,8 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 @@ -560,8 +560,8 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 @@ -617,8 +617,8 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 @@ -674,8 +674,8 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 32 Data size: 2880 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 @@ -792,8 +792,8 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 @@ -908,7 +908,7 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: int) @@ -965,7 +965,7 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: int) @@ -1022,7 +1022,7 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: int) @@ -1079,7 +1079,7 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: int) @@ -1136,7 +1136,7 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: int) @@ -1193,7 +1193,7 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: int) @@ -1307,7 +1307,7 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: int) diff --git ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out index c3bf0d8..b116e9e 100644 --- ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out +++ ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out @@ -164,7 +164,7 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 40 Data size: 400 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string) @@ -341,8 +341,8 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 2064 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 1384 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 @@ -455,8 +455,8 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 2064 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 1384 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/groupby_cube1.q.out ql/src/test/results/clientpositive/groupby_cube1.q.out index 4f44d4f..37f2519 100644 --- ql/src/test/results/clientpositive/groupby_cube1.q.out +++ ql/src/test/results/clientpositive/groupby_cube1.q.out @@ -56,7 +56,7 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) @@ -223,7 +223,7 @@ STAGE PLANS: aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col2 (type: bigint) @@ -320,7 +320,7 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) @@ -403,7 +403,7 @@ STAGE PLANS: aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col2 (type: bigint) @@ -541,7 +541,7 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col3) (type: int) @@ -608,7 +608,7 @@ STAGE PLANS: aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col3) (type: int) diff --git ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out index 0a21dbe..e5dc022 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out @@ -79,7 +79,7 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) @@ -185,7 +185,7 @@ STAGE PLANS: aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double) @@ -313,7 +313,7 @@ STAGE PLANS: aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 12 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) diff --git ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out index 3597609..7a07181 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out @@ -74,7 +74,7 @@ STAGE PLANS: aggregations: avg(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double), _col4 (type: bigint) @@ -185,7 +185,7 @@ STAGE PLANS: aggregations: avg(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double), _col4 (type: bigint) diff --git ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out index d1be46d..9927429 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out @@ -71,7 +71,7 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) @@ -148,7 +148,7 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) @@ -273,7 +273,7 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) @@ -374,7 +374,7 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) diff --git ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out index 6d11add..d05a120 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out @@ -88,7 +88,7 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) @@ -228,7 +228,7 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) diff --git ql/src/test/results/clientpositive/groupby_grouping_sets6.q.out ql/src/test/results/clientpositive/groupby_grouping_sets6.q.out index d2ff112..da37ea3 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets6.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets6.q.out @@ -55,7 +55,7 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string) @@ -140,7 +140,7 @@ STAGE PLANS: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string) diff --git ql/src/test/results/clientpositive/groupby_rollup1.q.out ql/src/test/results/clientpositive/groupby_rollup1.q.out index 0108ce0..bf87012 100644 --- ql/src/test/results/clientpositive/groupby_rollup1.q.out +++ ql/src/test/results/clientpositive/groupby_rollup1.q.out @@ -56,7 +56,7 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) @@ -133,7 +133,7 @@ STAGE PLANS: aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col2 (type: bigint) @@ -230,7 +230,7 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) @@ -307,7 +307,7 @@ STAGE PLANS: aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col2 (type: bigint) @@ -445,7 +445,7 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col3) (type: int) @@ -512,7 +512,7 @@ STAGE PLANS: aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col3) (type: int) diff --git ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out index 301b90c..261631d 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out @@ -63,7 +63,7 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) @@ -143,9 +143,9 @@ SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] +Num Buckets: 1 +Bucket Columns: [key, value, agg] +Sort Columns: [Order(col:key, order:1), Order(col:value, order:1), Order(col:agg, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test rollup, should be bucketed and sorted on key, value, grouping_key @@ -250,7 +250,7 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) @@ -330,9 +330,9 @@ SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] +Num Buckets: 1 +Bucket Columns: [key, value, agg] +Sort Columns: [Order(col:key, order:1), Order(col:value, order:1), Order(col:agg, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test cube, should be bucketed and sorted on key, value, grouping_key @@ -437,7 +437,7 @@ STAGE PLANS: aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) @@ -517,9 +517,9 @@ SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] +Num Buckets: 1 +Bucket Columns: [key, value, agg] +Sort Columns: [Order(col:key, order:1), Order(col:value, order:1), Order(col:agg, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test grouping sets, should be bucketed and sorted on key, value, grouping_key