diff --git ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java index aa094ee..427e151 100644 --- ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java +++ ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java @@ -32,7 +32,8 @@ HASHTABLEDUMMY(17), PTF(18), MUX(19), - DEMUX(20); + DEMUX(20), + GROUPMULTIPLEX(21); private final int value; diff --git ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index 8ae1c73..b9e5fdb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -18,11 +18,6 @@ package org.apache.hadoop.hive.ql; -import org.antlr.runtime.tree.Tree; -import org.apache.hadoop.hive.ql.metadata.HiveUtils; -import org.apache.hadoop.hive.ql.parse.ASTNode; -import org.apache.hadoop.hive.ql.parse.ASTNodeOrigin; - import java.text.MessageFormat; import java.util.HashMap; import java.util.Map; @@ -285,9 +280,6 @@ ALTER_TABLE_NOT_ALLOWED_RENAME_SKEWED_COLUMN(10207, " is a skewed column. It's not allowed to rename skewed column" + " or change skewed column type."), - HIVE_GROUPING_SETS_AGGR_NOMAPAGGR(10209, - "Grouping sets aggregations (with rollups or cubes) are not allowed if map-side " + - " aggregation is turned off. Set hive.map.aggr=true if you want to use grouping sets"), HIVE_GROUPING_SETS_AGGR_EXPRESSION_INVALID(10210, "Grouping sets aggregations (with rollups or cubes) are not allowed if aggregation function " + "parameters overlap with the aggregation functions columns"), diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/GroupMultiplexOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/GroupMultiplexOperator.java new file mode 100644 index 0000000..792f940 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/GroupMultiplexOperator.java @@ -0,0 +1,152 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; + +import javolution.util.FastBitSet; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.GroupMultiplexDesc; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; + +import com.google.common.collect.Lists; + +/** + * Group Multiplex Operator + */ +public class GroupMultiplexOperator extends Operator + implements Serializable { + + private static final Log LOG = LogFactory.getLog(GroupMultiplexOperator.class.getName()); + private static final long serialVersionUID = 1L; + + private ExprNodeEvaluator[] keyFields; + private ExprNodeEvaluator[] valueFields; + private List outputFieldInspectors; + + private int groupingSetsPosition; + private List groupingSets; + private List groupingSetsBitSet; + + private List newKeysGroupingSets; + + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + ObjectInspector rowInspector = inputObjInspectors[0]; + outputFieldInspectors = Lists.newArrayList(); + + keyFields = new ExprNodeEvaluator[conf.getKeys().size()]; + for (int i = 0; i < keyFields.length; i++) { + keyFields[i] = ExprNodeEvaluatorFactory.get(conf.getKeys().get(i)); + outputFieldInspectors.add(keyFields[i].initialize(rowInspector)); + } + + groupingSets = conf.getListGroupingSets(); + groupingSetsPosition = conf.getGroupingSetPosition(); + groupingSetsBitSet = new ArrayList(); + newKeysGroupingSets = new ArrayList(); + for (Integer groupingSet : groupingSets) { + // Create the mapping corresponding to the grouping set + ExprNodeEvaluator groupingSetValueEvaluator = ExprNodeEvaluatorFactory + .get(new ExprNodeConstantDesc(String.valueOf(groupingSet))); + groupingSetsBitSet.add(GroupByOperator.groupingSet2BitSet(groupingSet)); + newKeysGroupingSets.add(groupingSetValueEvaluator.evaluate(null)); + } + + outputFieldInspectors + .add(ObjectInspectorUtils.getStandardObjectInspector(conf.getKeys() + .get(conf.getKeys().size() - 1).getWritableObjectInspector())); + + valueFields = new ExprNodeEvaluator[conf.getValues().size()]; + for (int i = 0; i < valueFields.length; i++) { + valueFields[i] = ExprNodeEvaluatorFactory.get(conf.getValues().get(i)); + outputFieldInspectors.add(valueFields[i].initialize(rowInspector)); + } + + this.outputObjInspector = ObjectInspectorFactory + .getStandardStructObjectInspector(conf.getOutputColumnNames(), + outputFieldInspectors); + + initializeChildren(hconf); + } + + // forward in groupByKey1,,groupByKeyN,groupingId,value1,,,valueM + @Override + public void processOp(Object row, int tag) throws HiveException { + + Object[] inputKeys = new Object[this.keyFields.length]; + for (int i = 0; i < inputKeys.length; i++) { + inputKeys[i] = this.keyFields[i].evaluate(row); + } + Object[] inputValues = new Object[this.valueFields.length]; + for (int i = 0; i < inputValues.length; i++) { + inputValues[i] = this.valueFields[i].evaluate(row); + } + + Object[] output = new Object[this.colExprMap.size()]; + for (int groupingSetPos = 0; groupingSetPos < groupingSets.size(); groupingSetPos++) { + FastBitSet bitset = groupingSetsBitSet.get(groupingSetPos); + int keyPos = bitset.nextSetBit(0); + for (int i = 0; i < output.length; i++) { + if (i < this.keyFields.length - 1) { + // set group by keys + if (keyPos == i) { + output[i] = inputKeys[i]; + keyPos = bitset.nextSetBit(keyPos + 1); + } else { + output[i] = null; + } + } else if (i == this.groupingSetsPosition - 1) { + // set grouping id + output[i] = newKeysGroupingSets.get(groupingSetPos); + } else { + // input fields (for aggregations in GroupByOperator) + output[i] = inputValues[i - this.keyFields.length]; + } + } + forward(output, this.outputObjInspector); + } + } + + @Override + public String getName() { + return getOperatorName(); + } + + static public String getOperatorName() { + return "GMX"; + } + + @Override + public OperatorType getType() { + return OperatorType.GROUPMULTIPLEX; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java index 5d41fa1..7a5abf1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java @@ -28,8 +28,8 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorLimitOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorReduceSinkOperator; -import org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorSMBMapJoinOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.CollectDesc; @@ -41,6 +41,7 @@ import org.apache.hadoop.hive.ql.plan.FilterDesc; import org.apache.hadoop.hive.ql.plan.ForwardDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.plan.GroupMultiplexDesc; import org.apache.hadoop.hive.ql.plan.HashTableDummyDesc; import org.apache.hadoop.hive.ql.plan.HashTableSinkDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; @@ -93,6 +94,7 @@ public OpTuple(Class descClass, Class> opClass) { opvec.add(new OpTuple(ReduceSinkDesc.class, ReduceSinkOperator.class)); opvec.add(new OpTuple(ExtractDesc.class, ExtractOperator.class)); opvec.add(new OpTuple(GroupByDesc.class, GroupByOperator.class)); + opvec.add(new OpTuple(GroupMultiplexDesc.class, GroupMultiplexOperator.class)); opvec.add(new OpTuple(JoinDesc.class, JoinOperator.class)); opvec.add(new OpTuple(MapJoinDesc.class, MapJoinOperator.class)); opvec.add(new OpTuple(SMBJoinDesc.class, SMBMapJoinOperator.class)); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java index 5fad971..587a75a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.exec.GroupMultiplexOperator; import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator; import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; @@ -117,6 +118,9 @@ public ParseContext transform(ParseContext pactx) throws SemanticException { opRules.put(new RuleRegExp("R10", PTFOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getPTFProc()); + opRules.put(new RuleRegExp("R11", + GroupMultiplexOperator.getOperatorName() + "%"), + ColumnPrunerProcFactory.getGroupMultiplexProc()); // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along Dispatcher disp = new DefaultRuleDispatcher(ColumnPrunerProcFactory diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java index 2a8fb2b..d637055 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.optimizer; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -34,6 +35,7 @@ import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.exec.GroupMultiplexOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator; import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator; @@ -152,6 +154,32 @@ public static ColumnPrunerGroupByProc getGroupByProc() { } /** + * Node Processor for Column Pruning on Group Multiplex Operators. + */ + public static class ColumnPrunerGroupMultiplexProc implements NodeProcessor { + public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, Object... nodeOutputs) + throws SemanticException { + GroupMultiplexOperator op = (GroupMultiplexOperator) nd; + ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx; + List colLists = new ArrayList(); + Collection keys = op.getColumnExprMap().values(); + for (ExprNodeDesc key : keys) { + colLists = Utilities.mergeUniqElems(colLists, key.getCols()); + } + cppCtx.getPrunedColLists().put(op, colLists); + return null; + } + } + + /** + * Factory method to get the ColumnPrunerGroupMultiplexProc class. + * @return ColumnPrunerGroupByProc + */ + public static ColumnPrunerGroupMultiplexProc getGroupMultiplexProc() { + return new ColumnPrunerGroupMultiplexProc(); + } + + /** * - Pruning can only be done for Windowing. PTFs are black boxes, * we assume all columns are needed. * - add column names referenced in WindowFn args and in WindowFn expressions diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 6cdaedb..5a71325 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -75,6 +75,7 @@ import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator; +import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; @@ -142,6 +143,7 @@ import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc; import org.apache.hadoop.hive.ql.plan.ForwardDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.plan.GroupMultiplexDesc; import org.apache.hadoop.hive.ql.plan.HiveOperation; import org.apache.hadoop.hive.ql.plan.JoinCondDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; @@ -4599,9 +4601,12 @@ private Operator genGroupByPlan1MR(String dest, QB qb, Operator input) numReducers = 1; } - // Grouping sets are not allowed if (!groupingSets.isEmpty()) { - throw new SemanticException(ErrorMsg.HIVE_GROUPING_SETS_AGGR_NOMAPAGGR.getMsg()); + checkExpressionsForGroupingSet(grpByExprs, + parseInfo.getDistinctFuncExprsForClause(dest), + parseInfo.getAggregationExprsForClause(dest), + opParseCtx.get(input).getRowResolver()); + input = genGroupMultiplexOperator(qb, dest, grpByExprs, input, groupingSets); } // ////// 1. Generate ReduceSinkOperator @@ -4614,7 +4619,7 @@ private Operator genGroupByPlan1MR(String dest, QB qb, Operator input) false, numReducers, false, - false); + !groupingSets.isEmpty()); // ////// 2. Generate GroupbyOperator Operator groupByOperatorInfo = genGroupByPlanGroupByOperator(parseInfo, @@ -4623,6 +4628,56 @@ private Operator genGroupByPlan1MR(String dest, QB qb, Operator input) return groupByOperatorInfo; } + private Operator genGroupMultiplexOperator(QB qb, String dest, List grpByExprs, + Operator inputOperatorInfo, List groupingSetKeys) throws SemanticException { + + RowResolver inputRowResolver = opParseCtx.get(inputOperatorInfo).getRowResolver(); + RowResolver outputRowResolver = new RowResolver(); + outputRowResolver.setIsExprResolver(true); + List groupByKeys = new ArrayList(); + List outputColumnNames = new ArrayList(); + Map colExprMap = new HashMap(); + + for (int i = 0; i < grpByExprs.size(); ++i) { + ASTNode grpbyExpr = grpByExprs.get(i); + ExprNodeDesc grpByExprNode = genExprNodeDesc(grpbyExpr, inputRowResolver); + groupByKeys.add(grpByExprNode); + String field = getColumnInternalName(i); + outputColumnNames.add(field); + outputRowResolver.putExpression(grpbyExpr, + new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false)); + colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); + } + + List outputValues = new ArrayList(); + + createNewGroupingKey(groupByKeys, outputColumnNames, outputRowResolver, colExprMap); + int groupingSetsPosition = groupByKeys.size(); + + Map aggrs = qb.getParseInfo().getAggregationExprsForClause(dest); + for (ASTNode aggr : aggrs.values()) { + for (int i = 1; i < aggr.getChildCount(); i++) { + ASTNode parameter = (ASTNode) aggr.getChild(i); + if (outputRowResolver.getExpression(parameter) == null) { + ExprNodeDesc exprDesc = genExprNodeDesc(parameter, inputRowResolver); + String field = getColumnInternalName(outputColumnNames.size()); + outputColumnNames.add(field); + outputValues.add(exprDesc); + outputRowResolver.putExpression(parameter, + new ColumnInfo(field, exprDesc.getTypeInfo(), null, false)); + colExprMap.put(field, exprDesc); + } + } + } + Operator op = + putOpInsertMap(OperatorFactory.getAndMakeChild( + new GroupMultiplexDesc(outputColumnNames, groupByKeys, outputValues, groupingSetKeys), + new RowSchema(outputRowResolver.getColumnInfos()), inputOperatorInfo), + outputRowResolver); + op.setColumnExprMap(colExprMap); + return op; + } + @SuppressWarnings({"nls"}) private Operator genGroupByPlan1ReduceMultiGBY(List dests, QB qb, Operator input, Map aliasToOpInfo) @@ -4839,11 +4894,10 @@ private Operator genGroupByPlan2MR(String dest, QB qb, Operator input) List grpByExprs = grpByExprsGroupingSets.getFirst(); List groupingSets = grpByExprsGroupingSets.getSecond(); - // Grouping sets are not allowed - // This restriction can be lifted in future. - // HIVE-3508 has been filed for this if (!groupingSets.isEmpty()) { - throw new SemanticException(ErrorMsg.HIVE_GROUPING_SETS_AGGR_NOMAPAGGR.getMsg()); + checkExpressionsForGroupingSet(grpByExprs, parseInfo.getDistinctFuncExprsForClause(dest), + parseInfo.getAggregationExprsForClause(dest), opParseCtx.get(input).getRowResolver()); + input = genGroupMultiplexOperator(qb, dest, grpByExprs, (SelectOperator) input, groupingSets); } // ////// 1. Generate ReduceSinkOperator @@ -4862,7 +4916,7 @@ private Operator genGroupByPlan2MR(String dest, QB qb, Operator input) false, -1, false, - false); + !groupingSets.isEmpty()); // ////// 2. Generate GroupbyOperator Map genericUDAFEvaluators = diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/GroupMultiplexDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/GroupMultiplexDesc.java new file mode 100644 index 0000000..7c9c32c --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/GroupMultiplexDesc.java @@ -0,0 +1,78 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.util.List; + +/** + * GroupMultiplexDesc. + */ +@Explain(displayName = "Group Multiplex Operator") +public class GroupMultiplexDesc extends AbstractOperatorDesc { + + private List keys; + private List values; + private List listGroupingSets; + private int groupingSetPosition; + + private List outputColumnNames; + + public GroupMultiplexDesc() { + } + + public GroupMultiplexDesc( + final List outputColumnNames, + final List keys, + final List values, + final List listGroupingSets) { + this.outputColumnNames = outputColumnNames; + this.keys = keys; + this.values = values; + this.listGroupingSets = listGroupingSets; + this.groupingSetPosition = keys.size(); + } + + + @Explain(displayName = "keys") + public String getKeyString() { + return PlanUtils.getExprListString(keys); + } + + public List getKeys() { + return keys; + } + + public List getValues() { + return values; + } + + @Explain(displayName = "outputColumnNames") + public List getOutputColumnNames() { + return outputColumnNames; + } + + public List getListGroupingSets() { + return listGroupingSets; + } + + public int getGroupingSetPosition() { + return groupingSetPosition; + } + +} diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets_skew_without_mapaggr.q ql/src/test/queries/clientpositive/groupby_grouping_sets_skew_without_mapaggr.q new file mode 100644 index 0000000..88cfe12 --- /dev/null +++ ql/src/test/queries/clientpositive/groupby_grouping_sets_skew_without_mapaggr.q @@ -0,0 +1,20 @@ +set hive.map.aggr=false; +set hive.groupby.skewindata=true; +CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1; + +SELECT * FROM T1; + +SELECT a, b, count(*) from T1 group by a, b with cube; + +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()); + +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)); + +SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c); + +SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)); + +SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b); + diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets_without_mapaggr.q ql/src/test/queries/clientpositive/groupby_grouping_sets_without_mapaggr.q new file mode 100644 index 0000000..0a2d719 --- /dev/null +++ ql/src/test/queries/clientpositive/groupby_grouping_sets_without_mapaggr.q @@ -0,0 +1,19 @@ +set hive.map.aggr=false; +CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1; + +SELECT * FROM T1; + +SELECT a, b, count(*) from T1 group by a, b with cube; + +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()); + +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)); + +SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c); + +SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)); + +SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b); + diff --git ql/src/test/results/clientpositive/groupby_grouping_sets_skew_without_mapaggr.q.out ql/src/test/results/clientpositive/groupby_grouping_sets_skew_without_mapaggr.q.out new file mode 100644 index 0000000..0665eb4 --- /dev/null +++ ql/src/test/results/clientpositive/groupby_grouping_sets_skew_without_mapaggr.q.out @@ -0,0 +1,142 @@ +PREHOOK: query: CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: SELECT * FROM T1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM T1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +8 1 1 +5 2 2 +1 1 3 +2 2 4 +2 3 5 +3 2 8 +PREHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +NULL NULL 6 +NULL 1 2 +NULL 2 3 +NULL 3 1 +1 NULL 1 +1 1 1 +2 NULL 2 +2 2 1 +2 3 1 +3 NULL 1 +3 2 1 +5 NULL 1 +5 2 1 +8 NULL 1 +8 1 1 +PREHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +NULL NULL 6 +NULL 1 2 +NULL 2 3 +NULL 3 1 +1 NULL 1 +1 1 1 +2 NULL 2 +2 2 1 +2 3 1 +3 NULL 1 +3 2 1 +5 NULL 1 +5 2 1 +8 NULL 1 +8 1 1 +PREHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 NULL 1 +1 1 1 +2 NULL 2 +2 2 1 +2 3 1 +3 NULL 1 +3 2 1 +5 NULL 1 +5 2 1 +8 NULL 1 +8 1 1 +PREHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +1 +2 +3 +5 +8 +PREHOOK: query: SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 +2 +3 +5 +8 +PREHOOK: query: SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +2.0 1 +4.0 1 +5.0 2 +7.0 1 +9.0 1 diff --git ql/src/test/results/clientpositive/groupby_grouping_sets_without_mapaggr.q.out ql/src/test/results/clientpositive/groupby_grouping_sets_without_mapaggr.q.out new file mode 100644 index 0000000..0665eb4 --- /dev/null +++ ql/src/test/results/clientpositive/groupby_grouping_sets_without_mapaggr.q.out @@ -0,0 +1,142 @@ +PREHOOK: query: CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: SELECT * FROM T1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM T1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +8 1 1 +5 2 2 +1 1 3 +2 2 4 +2 3 5 +3 2 8 +PREHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +NULL NULL 6 +NULL 1 2 +NULL 2 3 +NULL 3 1 +1 NULL 1 +1 1 1 +2 NULL 2 +2 2 1 +2 3 1 +3 NULL 1 +3 2 1 +5 NULL 1 +5 2 1 +8 NULL 1 +8 1 1 +PREHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +NULL NULL 6 +NULL 1 2 +NULL 2 3 +NULL 3 1 +1 NULL 1 +1 1 1 +2 NULL 2 +2 2 1 +2 3 1 +3 NULL 1 +3 2 1 +5 NULL 1 +5 2 1 +8 NULL 1 +8 1 1 +PREHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 NULL 1 +1 1 1 +2 NULL 2 +2 2 1 +2 3 1 +3 NULL 1 +3 2 1 +5 NULL 1 +5 2 1 +8 NULL 1 +8 1 1 +PREHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +1 +2 +3 +5 +8 +PREHOOK: query: SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 +2 +3 +5 +8 +PREHOOK: query: SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +2.0 1 +4.0 1 +5.0 2 +7.0 1 +9.0 1